diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -104,7 +104,7 @@ "family": "Indo-European", "flores_path": "por_Latn", "fleurs_tag": "pt_br", - "commonvoice_hours": 177.0, + "commonvoice_hours": 178.0, "commonvoice_locale": "pt", "in_benchmark": true }, @@ -380,7 +380,7 @@ "family": "Indo-European", "flores_path": null, "fleurs_tag": "ps_af", - "commonvoice_hours": 80.0, + "commonvoice_hours": 81.0, "commonvoice_locale": "ps", "in_benchmark": false }, @@ -620,7 +620,7 @@ "family": "Indo-European", "flores_path": "nld_Latn", "fleurs_tag": "nl_nl", - "commonvoice_hours": 118.0, + "commonvoice_hours": 119.0, "commonvoice_locale": "nl", "in_benchmark": true }, @@ -1376,7 +1376,7 @@ "family": "Turkic", "flores_path": "uig_Arab", "fleurs_tag": null, - "commonvoice_hours": 368.0, + "commonvoice_hours": 369.0, "commonvoice_locale": "ug", "in_benchmark": true }, @@ -3284,7 +3284,7 @@ "family": "Atlantic-Congo", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 9.5, + "commonvoice_hours": 9.7, "commonvoice_locale": "bum", "in_benchmark": false }, @@ -3560,7 +3560,7 @@ "family": "Abkhaz-Adyge", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 44.0, + "commonvoice_hours": 45.0, "commonvoice_locale": "kbd", "in_benchmark": false }, @@ -4976,7 +4976,7 @@ "family": "Indo-European", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 4.8, + "commonvoice_hours": 5.8, "commonvoice_locale": "kvx", "in_benchmark": false }, @@ -5420,7 +5420,7 @@ "family": "Indo-European", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 2.7, + "commonvoice_hours": 4.1, "commonvoice_locale": "kxp", "in_benchmark": false }, @@ -8182,6 +8182,17 @@ "license": "Llama3", "creation_date": "2024-04-17" }, + { + "id": "openai/gpt-4.1-mini", + "name": "GPT-4.1 Mini", + "provider_name": "OpenAI", + "cost": 1.6, + "hf_id": null, + "size": null, + "type": "Commercial", + "license": null, + "creation_date": "2025-04-14" + }, { "id": "openai/gpt-4.1-nano", "name": "GPT-4.1 Nano", @@ -8204,6 +8215,28 @@ "license": null, "creation_date": "2024-07-18" }, + { + "id": "openai/gpt-3.5-turbo-0613", + "name": "GPT-3.5 Turbo (older v0613)", + "provider_name": "OpenAI", + "cost": 2.0, + "hf_id": null, + "size": null, + "type": "Commercial", + "license": null, + "creation_date": "2024-01-25" + }, + { + "id": "openai/gpt-3.5-turbo", + "name": "GPT-3.5 Turbo", + "provider_name": "OpenAI", + "cost": 1.5, + "hf_id": null, + "size": null, + "type": "Commercial", + "license": null, + "creation_date": "2023-05-28" + }, { "id": "mistralai/mistral-small-3.1-24b-instruct", "name": "Mistral Small 3.1 24B", @@ -8215,6 +8248,28 @@ "license": "Apache 2.0", "creation_date": "2025-03-11" }, + { + "id": "mistralai/mistral-saba", + "name": "Saba", + "provider_name": "Mistral", + "cost": 0.6, + "hf_id": null, + "size": null, + "type": "Commercial", + "license": null, + "creation_date": "2025-02-17" + }, + { + "id": "mistralai/mistral-nemo", + "name": "Mistral Nemo", + "provider_name": "Mistral", + "cost": 0.07, + "hf_id": "mistralai/Mistral-Nemo-Instruct-2407", + "size": 12247782400.0, + "type": "Open", + "license": "Apache 2.0", + "creation_date": "2024-07-17" + }, { "id": "google/gemini-2.5-flash-preview", "name": "Gemini 2.5 Flash Preview", @@ -8226,6 +8281,17 @@ "license": null, "creation_date": "2025-04-17" }, + { + "id": "google/gemini-2.0-flash-lite-001", + "name": "Gemini 2.0 Flash Lite", + "provider_name": "Google", + "cost": 0.3, + "hf_id": null, + "size": null, + "type": "Commercial", + "license": null, + "creation_date": "2025-02-25" + }, { "id": "google/gemma-3-27b-it", "name": "Gemma 3 27B", @@ -8237,17 +8303,6 @@ "license": "Gemma", "creation_date": "2025-03-01" }, - { - "id": "qwen/qwq-32b", - "name": "QwQ 32B", - "provider_name": "Qwen", - "cost": 0.2, - "hf_id": "Qwen/QwQ-32B", - "size": 32763876352.0, - "type": "Open", - "license": "Apache 2.0", - "creation_date": "2025-03-05" - }, { "id": "deepseek/deepseek-chat-v3-0324", "name": "DeepSeek V3 0324", @@ -8259,6 +8314,28 @@ "license": "Mit", "creation_date": "2025-03-24" }, + { + "id": "deepseek/deepseek-chat", + "name": "DeepSeek V3", + "provider_name": "DeepSeek", + "cost": 0.89, + "hf_id": "deepseek-ai/DeepSeek-V3", + "size": 684531386000.0, + "type": "Open", + "license": "", + "creation_date": "2024-12-25" + }, + { + "id": "microsoft/phi-4", + "name": "Phi 4", + "provider_name": "Microsoft", + "cost": 0.14, + "hf_id": "microsoft/phi-4", + "size": 14659507200.0, + "type": "Open", + "license": "Mit", + "creation_date": "2024-12-11" + }, { "id": "microsoft/phi-4-multimodal-instruct", "name": "Phi 4 Multimodal Instruct", @@ -8347,6 +8424,22 @@ "score": 0.5835344719191324, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.4440401202498867, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.634367011980859, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", @@ -8379,6 +8472,38 @@ "score": 0.746881923400435, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.33931070790731876, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5427607577375184, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.33100023927532657, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5283894635872319, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", @@ -8395,6 +8520,38 @@ "score": 0.5880210095195896, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.49602824624416075, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.6821172236213218, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2511517944602615, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4484633445384819, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", @@ -8412,35 +8569,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5617561349997696, + "score": 0.5749603738163459, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.7132694856647042, + "score": 0.7240488251574404, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.2963216580569375, + "score": 0.5617561349997696, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5101500486835966, + "score": 0.7132694856647042, "sentence_nr": 0 }, { @@ -8459,6 +8616,38 @@ "score": 0.77785134764153, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.5812269906647429, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7632349489047001, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3604302813636731, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5648482391867907, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", @@ -8555,6 +8744,22 @@ "score": 0.5246291817407542, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3629639414033421, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5313865724318225, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", @@ -8587,6 +8792,38 @@ "score": 0.4968492831219663, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3671596072089024, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5953389856441371, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3671596072089024, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5953389856441371, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", @@ -8603,6 +8840,38 @@ "score": 0.5164808837319497, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3276399373163712, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5131503736358733, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.23343658187420896, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5188968707275573, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", @@ -8620,35 +8889,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4273817965049865, + "score": 0.2596939072050362, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6016204186733703, + "score": 0.4394574387008692, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.4273817965049865, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6016204186733703, "sentence_nr": 0 }, { @@ -8667,6 +8936,38 @@ "score": 0.5952617863931118, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3930690372081822, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.6043335353470877, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.32374956399799487, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5552439191255761, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", @@ -8763,12 +9064,28 @@ "score": 0.9463396364218181, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.8781548422306138, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.9156314785041992, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.8347630061919914, + "score": 0.8562379115188704, "sentence_nr": 0 }, { @@ -8776,7 +9093,7 @@ "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.90120785919445, + "score": 0.914880147320643, "sentence_nr": 0 }, { @@ -8795,6 +9112,38 @@ "score": 0.9958930217841712, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", @@ -8811,6 +9160,38 @@ "score": 0.9958930217841712, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.7086626689681226, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.8167266341909933, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", @@ -8827,6 +9208,22 @@ "score": 0.9958930217841712, "sentence_nr": 0 }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", @@ -8844,23 +9241,23 @@ "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.9878765474230741, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.9958930217841712, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", @@ -8868,13 +9265,29 @@ "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", "score": 0.9958930217841712, "sentence_nr": 0 }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.9577952806172931, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.9847675380468571, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", @@ -8971,6 +9384,22 @@ "score": 0.4820043660869366, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.3207234978095439, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4977201243943551, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", @@ -9003,6 +9432,38 @@ "score": 0.5651672709988255, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.20596702648945656, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.43080958860183666, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.2057575468480153, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.43186109332677985, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", @@ -9019,6 +9480,38 @@ "score": 0.49819657249183386, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.28925821076686087, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5172170138888198, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.19539650323609742, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.43705356257534755, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", @@ -9036,35 +9529,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.441464946158803, + "score": 0.41019145639667887, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6003092613714627, + "score": 0.5550063659734302, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.34734422615832194, + "score": 0.441464946158803, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5262645092345396, + "score": 0.6003092613714627, "sentence_nr": 0 }, { @@ -9083,6 +9576,38 @@ "score": 0.5256417654956012, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4338913033745421, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6021209112384719, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.2833191488478096, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4778413511597435, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", @@ -9179,6 +9704,22 @@ "score": 0.5861956606716949, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.4354870567552493, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6005516255183536, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", @@ -9211,6 +9752,38 @@ "score": 0.6493197366069867, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3935566429569304, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5739224928184342, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.4113107630914792, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6024368113718255, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", @@ -9227,6 +9800,38 @@ "score": 0.5946895227088745, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.422553348837848, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5840904424340781, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.24177360091172484, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.46999682982170327, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", @@ -9244,35 +9849,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.47840604738578085, + "score": 0.36819813355629544, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6297473901472479, + "score": 0.5359192371665321, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.47840604738578085, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6297473901472479, "sentence_nr": 0 }, { @@ -9291,6 +9896,38 @@ "score": 0.6679481474132949, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.4535479195319991, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6276625295274986, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.38827890681861416, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5625611809933323, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", @@ -9387,6 +10024,22 @@ "score": 0.5666753970394321, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.40454825028023833, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5951482608075656, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", @@ -9419,6 +10072,38 @@ "score": 0.5674650482249737, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.27292771734066346, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5212657903012956, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.2736066918197224, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.514117258748181, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", @@ -9435,6 +10120,38 @@ "score": 0.4681164293806726, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3464618221511632, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5286087759223264, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.13337282396479677, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4379609517006709, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", @@ -9452,35 +10169,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3725907668893922, + "score": 0.3186774371729279, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5838909337906717, + "score": 0.5594486931861122, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1229583779881281, + "score": 0.3725907668893922, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3267617054992069, + "score": 0.5838909337906717, "sentence_nr": 0 }, { @@ -9499,6 +10216,38 @@ "score": 0.6257813924169782, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.43058052532641716, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5975873581943051, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.0006956037840845855, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", @@ -9595,6 +10344,22 @@ "score": 0.4540589962283635, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.344338817815182, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.534136089789678, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", @@ -9627,6 +10392,38 @@ "score": 0.5426399702952437, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.24915993462248331, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4396481537491334, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.24192108083438388, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4175394768692251, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", @@ -9643,6 +10440,38 @@ "score": 0.4892722276483434, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.28677829776706215, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5017473465626182, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.253492999601661, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5066198263682749, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", @@ -9659,6 +10488,22 @@ "score": 0.5978315904237319, "sentence_nr": 0 }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3231223153895219, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5181020767289912, + "sentence_nr": 0 + }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", @@ -9676,23 +10521,23 @@ "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.28716979381420105, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.5171262478660463, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", @@ -9700,11 +10545,27 @@ "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5171262478660463, + "score": 0.5143386556316146, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.25970562893843707, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4734691139529827, "sentence_nr": 0 }, { @@ -9803,6 +10664,22 @@ "score": 0.5834523243646894, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.39387637515513285, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5909459753994141, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", @@ -9835,6 +10712,38 @@ "score": 0.5289420578289948, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.23037101973672552, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4814028637155758, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.23922316498050586, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.44705223970825275, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", @@ -9851,6 +10760,38 @@ "score": 0.5861999156017297, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.406520102973592, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5957668554688678, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.001982553528945282, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", @@ -9868,35 +10809,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4100880948326119, + "score": 0.38914560772865003, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5748650910980349, + "score": 0.5725189583566723, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.4100880948326119, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.5748650910980349, "sentence_nr": 0 }, { @@ -9915,6 +10856,38 @@ "score": 0.7074940030211, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.5215309394772263, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6680522998193752, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.30181997280390516, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.49784375185665214, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", @@ -10011,6 +10984,22 @@ "score": 0.5344680037267059, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4015176110768588, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5570033941527084, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", @@ -10043,6 +11032,38 @@ "score": 0.5956867226653717, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2634713516356232, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.48530293133680297, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.27197487450984753, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.49912608327888125, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", @@ -10059,6 +11080,38 @@ "score": 0.5432217848942439, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3940944668383663, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5436661645656922, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.18405105316121032, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.44249774237178774, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", @@ -10076,35 +11129,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4831574055451935, + "score": 0.4451909305935609, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6238976883927624, + "score": 0.5879542667474525, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.4831574055451935, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6238976883927624, "sentence_nr": 0 }, { @@ -10123,6 +11176,38 @@ "score": 0.574382729364071, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4069863197589215, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5748668902054292, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.32926293944513546, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5443707654946067, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", @@ -10219,6 +11304,22 @@ "score": 0.6547171931962555, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5534262126390082, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7517262392757741, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", @@ -10251,6 +11352,38 @@ "score": 0.6515522498665886, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.42759222581369355, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6292567445430434, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.40493924631497547, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.619667547537019, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", @@ -10267,6 +11400,38 @@ "score": 0.6533822343227146, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4699943434621447, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7050475306353289, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.36720541083887626, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.556299558101958, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", @@ -10283,6 +11448,22 @@ "score": 0.6880668163480468, "sentence_nr": 0 }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4800491211460759, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7058154381442389, + "sentence_nr": 0 + }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", @@ -10300,23 +11481,23 @@ "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.05034135169161612, + "score": 0.45495679780282583, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.25001156386121903, + "score": 0.699735222419999, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", @@ -10324,13 +11505,29 @@ "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", "score": 0.699735222419999, "sentence_nr": 0 }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4396453171706368, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6386415226511408, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", @@ -10427,6 +11624,22 @@ "score": 0.5465517653500693, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.45096237059550953, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.6276175882237706, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", @@ -10459,6 +11672,38 @@ "score": 0.5716341952568125, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.39141668922302714, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.592604953206805, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.36488503551800366, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5723243480372939, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", @@ -10475,6 +11720,38 @@ "score": 0.5410881356746259, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.41595343891606124, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.6228483474103623, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.3647006103716475, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5752487475212197, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", @@ -10492,35 +11769,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.4611551555069207, + "score": 0.44751671242344965, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.6294324146720465, + "score": 0.6011479673559094, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.19685577478840446, + "score": 0.4611551555069207, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4551749985589161, + "score": 0.6294324146720465, "sentence_nr": 0 }, { @@ -10539,6 +11816,38 @@ "score": 0.6246787832833863, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.4510663025172895, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.6439430694122988, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.28278985838873544, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5364084166509312, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", @@ -10635,6 +11944,22 @@ "score": 0.4110516731487298, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.29705368433365786, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5186257965942541, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", @@ -10667,6 +11992,38 @@ "score": 0.5840165124966731, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.2667148788792994, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5186154099951347, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.2667148788792994, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5186154099951347, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", @@ -10683,6 +12040,38 @@ "score": 0.5497542561082874, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.17509359925241916, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.08875218317184876, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3759098347425247, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", @@ -10700,35 +12089,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4342485684315921, + "score": 0.4042355060018226, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5862756549666985, + "score": 0.5567844610331302, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.4342485684315921, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.5862756549666985, "sentence_nr": 0 }, { @@ -10747,6 +12136,38 @@ "score": 0.5234687470369108, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.3709295124087965, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5443097614366986, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.3415769856419364, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.555003404573853, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", @@ -10843,6 +12264,22 @@ "score": 0.6608358312257032, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.501322654905737, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6749353853594616, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", @@ -10875,6 +12312,38 @@ "score": 0.7019407549121803, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.3422590336943533, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5784088566089314, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.36382507187110075, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5597199178753666, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", @@ -10891,6 +12360,38 @@ "score": 0.6283965584123504, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.4783304729406064, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6570361507659167, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.31791964488124513, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5391165577881754, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", @@ -10908,35 +12409,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.4544549777519972, + "score": 0.4870178700768381, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6588011478075102, + "score": 0.6571420749199969, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.4544549777519972, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6588011478075102, "sentence_nr": 0 }, { @@ -10955,6 +12456,38 @@ "score": 0.6517469394467796, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.5253138252916816, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.713708032136132, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.36216295932014775, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5817525999349799, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", @@ -11051,6 +12584,22 @@ "score": 0.6326418045965277, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4307886337606128, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5983383363156769, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", @@ -11083,6 +12632,38 @@ "score": 0.6756807439055712, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.42913085819666935, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6230860315754777, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.42906677303047097, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6259670174303734, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", @@ -11099,6 +12680,38 @@ "score": 0.681786235656136, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.5335035739559317, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7040649389679675, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.3655292171929706, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5818586474198358, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", @@ -11116,35 +12729,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.5043550869731553, + "score": 0.5416689002024996, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6689462373151898, + "score": 0.7032606089138365, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.28630516999083483, + "score": 0.5043550869731553, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4859658293338903, + "score": 0.6689462373151898, "sentence_nr": 0 }, { @@ -11163,6 +12776,38 @@ "score": 0.6651586361790265, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.524431720527746, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076771205887638, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.48840342566834705, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6801218410948642, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", @@ -11259,6 +12904,22 @@ "score": 0.42969616197156246, "sentence_nr": 0 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.23555001457110075, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4328470748839584, + "sentence_nr": 0 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", @@ -11291,6 +12952,38 @@ "score": 0.4539275409654266, "sentence_nr": 0 }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.30081934992431447, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5592925653127773, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2871484146452821, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5435042611894851, + "sentence_nr": 0 + }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", @@ -11307,6 +13000,38 @@ "score": 0.4522509933949415, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.27552516390390885, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4769491782857059, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2263758580444643, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.41910522124347077, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", @@ -11324,35 +13049,35 @@ "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.34777225435927045, + "score": 0.18988095581607925, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5603739447290761, + "score": 0.41823143717308187, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0343688963868873, + "score": 0.34777225435927045, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2491787368704391, + "score": 0.5603739447290761, "sentence_nr": 0 }, { @@ -11371,6 +13096,38 @@ "score": 0.4700612059850866, "sentence_nr": 0 }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2858464946739048, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5073688691937708, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.19269477347765157, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.40684209227541773, + "sentence_nr": 0 + }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ja", @@ -11405,6627 +13162,7011 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.07407154448063642, - "sentence_nr": 1 + "score": 0.3897372020625521, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.43145434527321425, - "sentence_nr": 1 + "score": 0.5520780806464591, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 + "score": 0.2153742037697241, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 + "score": 0.4581737688885401, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22583314893598608, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.024459391267874976, - "sentence_nr": 1 + "score": 0.4634872123809323, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.12351824822447692, - "sentence_nr": 1 + "score": 0.2000682107464079, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.46822754470803873, - "sentence_nr": 1 + "score": 0.42213947952783815, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.2221644327559588, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.42764590771808364, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.17877584235677071, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.4463892736716987, - "sentence_nr": 1 + "score": 0.401518876879992, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 + "score": 0.30519601919508343, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 + "score": 0.48440897375540304, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.10957715528160569, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.4031456247133876, - "sentence_nr": 1 + "score": 0.3392444984825636, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.1583163321869008, - "sentence_nr": 1 + "score": 0.10964882554034484, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5032511267062394, - "sentence_nr": 1 + "score": 0.3356389160000376, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.20669086265781264, - "sentence_nr": 1 + "score": 0.22347194598034506, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5076721272198604, - "sentence_nr": 1 + "score": 0.4665650707763161, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.17630490037560695, - "sentence_nr": 1 + "score": 0.3016964931112797, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.48116430160978857, - "sentence_nr": 1 + "score": 0.4535427413316391, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.15611634095633747, - "sentence_nr": 1 + "score": 0.1911861015937895, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5075814499747183, - "sentence_nr": 1 + "score": 0.41477093415943017, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4469358090476088, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.4122750002638689, - "sentence_nr": 1 + "score": 0.5872096766535113, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.15412719160788987, - "sentence_nr": 1 + "score": 0.287676448167452, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5010353699512481, - "sentence_nr": 1 + "score": 0.4733049682606063, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.06647168102389285, - "sentence_nr": 1 + "score": 0.34059658886569716, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.34350832619898364, - "sentence_nr": 1 + "score": 0.5263364808620599, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 + "score": 0.31145364701208733, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 + "score": 0.5087911718200273, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.17077058518804336, - "sentence_nr": 1 + "score": 0.31145364701208733, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5022008374701596, - "sentence_nr": 1 + "score": 0.5087911718200273, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.10784756064735967, - "sentence_nr": 1 + "score": 0.2636532817002844, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.4427230465401631, - "sentence_nr": 1 + "score": 0.46325989506424586, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.05534265631745826, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.39688946206212833, - "sentence_nr": 1 + "score": 0.004236700409670164, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 + "score": 0.3711271620335373, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 + "score": 0.5606811328336353, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.06656213940646748, - "sentence_nr": 1 + "score": 0.2775751476798985, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.38435741328258305, - "sentence_nr": 1 + "score": 0.5467407840471017, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3372953649368346, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.42954139521687473, - "sentence_nr": 1 + "score": 0.5482505380106469, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.175396614619324, - "sentence_nr": 1 + "score": 0.3598041249522345, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.49736499605529066, - "sentence_nr": 1 + "score": 0.5672133517600307, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.15154395847232716, - "sentence_nr": 1 + "score": 0.275788082902897, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.46053919348995803, - "sentence_nr": 1 + "score": 0.4682894376569175, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.1609675245202845, - "sentence_nr": 1 + "score": 0.2898775882071186, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.5069863833094232, - "sentence_nr": 1 + "score": 0.5097463471921362, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2883603713983444, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.4041678259311437, - "sentence_nr": 1 + "score": 0.508309417496147, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.1290514243115152, - "sentence_nr": 1 + "score": 0.34655442187135127, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.4766581477336301, - "sentence_nr": 1 + "score": 0.6023567722379627, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.09735981717515908, - "sentence_nr": 1 + "score": 0.2740037448125678, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.35288934658906385, - "sentence_nr": 1 + "score": 0.4683631907729274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 + "score": 0.2740037448125678, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 + "score": 0.4683631907729274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.13012870333257068, - "sentence_nr": 1 + "score": 0.25810978038865107, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3852835519852091, - "sentence_nr": 1 + "score": 0.5114862976334219, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.40268767791422966, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3356633416447032, - "sentence_nr": 1 + "score": 0.6359049000877419, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.09553543457519309, - "sentence_nr": 1 + "score": 0.28143597386296987, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3531525294256142, - "sentence_nr": 1 + "score": 0.46833203769304566, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.12422788549118892, - "sentence_nr": 1 + "score": 0.08994455977452934, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40222210564426, - "sentence_nr": 1 + "score": 0.2887022044225924, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3766776485541237, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.28789057461471257, - "sentence_nr": 1 + "score": 0.5833210829022488, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.1568418931847707, - "sentence_nr": 1 + "score": 0.46443675322751826, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.41485110412488607, - "sentence_nr": 1 + "score": 0.6512173868183774, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.10467757347424328, - "sentence_nr": 1 + "score": 0.4244950970711203, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.36749853206282146, - "sentence_nr": 1 + "score": 0.6318443095842109, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0952569581727979, - "sentence_nr": 1 + "score": 0.4244950970711203, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.38264808953110185, - "sentence_nr": 1 + "score": 0.6505130874640082, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.13026649757585426, - "sentence_nr": 1 + "score": 0.0922665949962545, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.41550755035304077, - "sentence_nr": 1 + "score": 0.28952892991876095, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.039782861678265974, - "sentence_nr": 1 + "score": 0.28612208859224425, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.1175904695048123, - "sentence_nr": 1 + "score": 0.20955561269928308, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3996881234028031, - "sentence_nr": 1 + "score": 0.451252540938088, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.11564012893219777, - "sentence_nr": 1 + "score": 0.2988083057066004, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.44599783682350064, - "sentence_nr": 1 + "score": 0.4973008562914265, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 + "score": 0.28528905353056333, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 + "score": 0.4885812318466243, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.12022286401047096, - "sentence_nr": 1 + "score": 0.35797362976091973, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.48279986805368713, - "sentence_nr": 1 + "score": 0.5379266632230616, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.15350377490367967, - "sentence_nr": 1 + "score": 0.2606045000988204, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.47645148444499064, - "sentence_nr": 1 + "score": 0.4525313379099324, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.3739173814408067, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.5300063372835349, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3626791619232025, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4042977714165801, - "sentence_nr": 1 + "score": 0.539836963539224, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.13714845589364738, - "sentence_nr": 1 + "score": 0.2453787991485662, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.45499281593451946, - "sentence_nr": 1 + "score": 0.4099668999237371, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1426882822674975, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.38785209659947417, - "sentence_nr": 1 + "score": 0.34960972004472946, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.43122763125947844, - "sentence_nr": 1 + "score": 0.19360049306902116, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.12848168928706002, - "sentence_nr": 1 + "score": 0.32155751243171055, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4421263683867116, - "sentence_nr": 1 + "score": 0.5282954234137397, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.18629760071299903, - "sentence_nr": 1 + "score": 0.29762732722843216, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4381418376415505, - "sentence_nr": 1 + "score": 0.4476081666669917, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.09198045184317984, - "sentence_nr": 1 + "score": 0.23545988330717435, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4598393646838097, - "sentence_nr": 1 + "score": 0.40601705732690246, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.07798530247118374, - "sentence_nr": 1 + "score": 0.5051170491729706, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4006113700211268, - "sentence_nr": 1 + "score": 0.6566114460416413, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.14541923959059266, - "sentence_nr": 1 + "score": 0.35093163714125025, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.47577612932999147, - "sentence_nr": 1 + "score": 0.5161806517116977, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4187059279293422, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.2521233582161207, - "sentence_nr": 1 + "score": 0.5806611969643932, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.42286596174824126, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.5934357258501683, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.42071958648966074, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.5867954120500313, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.3182970443542658, - "sentence_nr": 1 + "score": 0.28176232344609076, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.5953162569846108, - "sentence_nr": 1 + "score": 0.46342775156724203, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.22238763863547817, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.5765887803460186, - "sentence_nr": 1 + "score": 0.27694098293799824, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.3026566818840519, - "sentence_nr": 1 + "score": 0.36560991595112396, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.5945859352092411, - "sentence_nr": 1 + "score": 0.537072365457506, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.18816868192268246, - "sentence_nr": 1 + "score": 0.35120509380099896, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5179253053631742, - "sentence_nr": 1 + "score": 0.5127991322787522, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.23522101642407195, - "sentence_nr": 1 + "score": 0.2935204022158406, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5302138314227511, - "sentence_nr": 1 + "score": 0.4867597973247361, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.09629060614977814, - "sentence_nr": 1 + "score": 0.36394690002866714, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.43565498999747165, - "sentence_nr": 1 + "score": 0.5567484827579814, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.278704088378991, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.4868935860000992, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.28418123342684043, - "sentence_nr": 1 + "score": 0.30364957397426867, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.539816402671069, - "sentence_nr": 1 + "score": 0.490461471023638, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.3765959322920135, - "sentence_nr": 1 + "score": 0.26416975244160523, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.6295826606382191, - "sentence_nr": 1 + "score": 0.4438864952532326, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.40801269202545287, - "sentence_nr": 1 + "score": 0.3091536050099401, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.6210533025653295, - "sentence_nr": 1 + "score": 0.504963808447426, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0867932999243575, - "sentence_nr": 1 + "score": 0.2377283063350729, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4201964133235075, - "sentence_nr": 1 + "score": 0.4229894191312214, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.08214106568089705, - "sentence_nr": 1 + "score": 0.2595156374159681, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3969463877642616, - "sentence_nr": 1 + "score": 0.44692374238325255, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.1897299381066278, - "sentence_nr": 1 + "score": 0.28644027312006637, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5086851537953713, - "sentence_nr": 1 + "score": 0.4643839364819269, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3169600089456638, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3833939462124923, - "sentence_nr": 1 + "score": 0.49269567715798124, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.06036796843527163, - "sentence_nr": 1 + "score": 0.1905285416851139, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.37581426760977427, - "sentence_nr": 1 + "score": 0.4177320592079303, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.07793031063789554, - "sentence_nr": 1 + "score": 0.547669734463399, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3700181221537743, - "sentence_nr": 1 + "score": 0.6920019082878786, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39070481609556534, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.2718653389257641, - "sentence_nr": 1 + "score": 0.53793582138906, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39892980454447485, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4551974335055929, - "sentence_nr": 1 + "score": 0.5839375286411709, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.23119533406164058, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3327209336079636, - "sentence_nr": 1 + "score": 0.4549622022003173, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.1804000267306113, - "sentence_nr": 1 + "score": 0.47187011152614905, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.451798442226037, - "sentence_nr": 1 + "score": 0.634570712438076, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.1777835117834348, - "sentence_nr": 1 + "score": 0.26924245665759383, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5166806073547074, - "sentence_nr": 1 + "score": 0.45760881141869997, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.1501956901694662, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.009396473650937872, - "sentence_nr": 1 + "score": 0.3435352939078531, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.13582906387565688, - "sentence_nr": 1 + "score": 0.30692705311222085, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.43344913217266734, - "sentence_nr": 1 + "score": 0.5531231299653412, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.49132868804528823, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3630576975795868, - "sentence_nr": 1 + "score": 0.6524450166860349, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0744904632040495, - "sentence_nr": 1 + "score": 0.2929684584911775, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4111163205685468, - "sentence_nr": 1 + "score": 0.5038324436049059, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3414171640083141, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4363130300030932, - "sentence_nr": 1 + "score": 0.5306256202657124, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3858101625283812, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.41747276065817185, - "sentence_nr": 1 + "score": 0.5618844078335644, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.3718731060476833, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.507920995618425, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3718474296835603, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.401865675252717, - "sentence_nr": 1 + "score": 0.5563193852397018, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.41613344165345995, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.41649654108052436, - "sentence_nr": 1 + "score": 0.5740077532098984, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.07749370908741021, - "sentence_nr": 1 + "score": 0.23015479707279926, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3853293582383978, - "sentence_nr": 1 + "score": 0.4043490475674803, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.08025555322499443, - "sentence_nr": 1 + "score": 0.17629023372542696, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.40116594181297777, - "sentence_nr": 1 + "score": 0.3473939681613876, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3116287423376191, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.43622390508229153, - "sentence_nr": 1 + "score": 0.4893092447918963, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.13343258247486778, - "sentence_nr": 1 + "score": 0.3761474539770847, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4018842345370629, - "sentence_nr": 1 + "score": 0.5536622992238994, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2459980185879811, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4250905063113662, - "sentence_nr": 1 + "score": 0.47485821292608255, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4196211376031888, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.36347800793516216, - "sentence_nr": 1 + "score": 0.5915995690152394, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.06254678076846341, - "sentence_nr": 1 + "score": 0.3785127547760706, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3887428577633272, - "sentence_nr": 1 + "score": 0.5548613310192737, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.08616711094288851, - "sentence_nr": 1 + "score": 0.40983351958195835, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3696512763473903, - "sentence_nr": 1 + "score": 0.5980339788644404, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.12894104034845807, - "sentence_nr": 1 + "score": 0.40840960406849836, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4486368934849452, - "sentence_nr": 1 + "score": 0.5662632887734669, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.08825252192863794, - "sentence_nr": 1 + "score": 0.40772967686463607, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4377853721520782, - "sentence_nr": 1 + "score": 0.5681891080404556, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.05345137572833361, - "sentence_nr": 1 + "score": 0.3627861053524045, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3829169125379508, - "sentence_nr": 1 + "score": 0.524896354547667, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.047201037160775325, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3404791678264965, - "sentence_nr": 1 + "score": 0.12237706077959995, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.08767210132815903, - "sentence_nr": 1 + "score": 0.40939284504147777, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.40476518002703893, - "sentence_nr": 1 + "score": 0.5645111896180985, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.05422898988559086, - "sentence_nr": 1 + "score": 0.27106784138456536, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.335890201952113, - "sentence_nr": 1 + "score": 0.458287745564531, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.08465714266003518, - "sentence_nr": 1 + "score": 0.4034224234291925, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.41240280272579144, - "sentence_nr": 1 + "score": 0.5736798834726872, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.47117590712234436, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.34617921188455225, - "sentence_nr": 1 + "score": 0.6157183058759933, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0588222649477664, - "sentence_nr": 1 + "score": 0.4840329060094462, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.3642771871011383, - "sentence_nr": 1 + "score": 0.6570537611908611, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.10186730973904586, - "sentence_nr": 1 + "score": 0.5211295957876436, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.43665642120840553, - "sentence_nr": 1 + "score": 0.6543358670735736, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.08248974616169381, - "sentence_nr": 1 + "score": 0.3844723738020861, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.40456777770242314, - "sentence_nr": 1 + "score": 0.5612506236862517, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.10496714075880566, - "sentence_nr": 1 + "score": 0.49428478171113605, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4262440114275301, - "sentence_nr": 1 + "score": 0.6360862650323953, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.15386029327005746, - "sentence_nr": 1 + "score": 0.30384210838236353, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.43911482594829104, - "sentence_nr": 1 + "score": 0.517369863710813, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.10070927557742705, - "sentence_nr": 1 + "score": 0.30384210838236353, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.43718220262892105, - "sentence_nr": 1 + "score": 0.517369863710813, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.11478463129234825, - "sentence_nr": 1 + "score": 0.33677049851999397, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4651957501593415, - "sentence_nr": 1 + "score": 0.5762640586619034, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.07137101582673294, - "sentence_nr": 1 + "score": 0.4134997467039312, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4075406301092705, - "sentence_nr": 1 + "score": 0.5981100031486645, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.09107675218561961, - "sentence_nr": 1 + "score": 0.19080206844973818, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4297036775694859, - "sentence_nr": 1 + "score": 0.4451297657136166, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.14738500064905094, - "sentence_nr": 1 + "score": 0.6611893921112539, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4659728395318289, - "sentence_nr": 1 + "score": 0.7385527846626787, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0643329477522681, - "sentence_nr": 1 + "score": 0.5472843613586278, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.3960585990192623, - "sentence_nr": 1 + "score": 0.6462543667959997, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.1262345212021199, - "sentence_nr": 1 + "score": 0.48936688255103167, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4305259421555756, - "sentence_nr": 1 + "score": 0.6232884959088987, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.17247941414020762, - "sentence_nr": 1 + "score": 0.49864013450084044, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.48320144379865687, - "sentence_nr": 1 + "score": 0.6473028953530363, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.057981164297440296, - "sentence_nr": 1 + "score": 0.4705190878543854, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.33896784137459673, - "sentence_nr": 1 + "score": 0.6348294479712023, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.09751270821852938, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.395617758442078, - "sentence_nr": 1 + "score": 0.0006449948400412797, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.06301432444316532, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4249725532507508, - "sentence_nr": 1 + "score": 0.15688445463098402, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.18248753930464637, - "sentence_nr": 1 + "score": 0.4711705838157902, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4759830743101189, - "sentence_nr": 1 + "score": 0.6028678286611538, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2822871796543221, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.1946966569103724, - "sentence_nr": 1 + "score": 0.5208915029538709, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0772718393063023, - "sentence_nr": 1 + "score": 0.1077205146963877, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4203683137304257, - "sentence_nr": 1 + "score": 0.428338145564396, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.08291357159799752, - "sentence_nr": 1 + "score": 0.09543667505391068, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4009694996956877, - "sentence_nr": 1 + "score": 0.40518998504409354, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.14087022592589463, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3714280466838255, - "sentence_nr": 1 + "score": 0.42752370954120755, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.10639077953820089, - "sentence_nr": 1 + "score": 0.3077490672091579, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.44452351549730684, - "sentence_nr": 1 + "score": 0.4982442692170086, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.16660766718834744, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3370100422576744, - "sentence_nr": 1 + "score": 0.43821636698725164, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3109058809229358, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3538602132402044, - "sentence_nr": 1 + "score": 0.5045951829816013, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.09058017202418268, - "sentence_nr": 1 + "score": 0.31213647387601523, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.45761691752578293, - "sentence_nr": 1 + "score": 0.556395949945723, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.167672929900467, - "sentence_nr": 1 + "score": 0.31213647387601523, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.46910779766306765, - "sentence_nr": 1 + "score": 0.556395949945723, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.36576182289875453, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.32123020755377657, - "sentence_nr": 1 + "score": 0.5569403582137159, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2987390399566193, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.47435308668900444, - "sentence_nr": 1 + "score": 0.49080609910208733, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.08351211898903935, - "sentence_nr": 1 + "score": 0.12121103811203453, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.33030812447506436, - "sentence_nr": 1 + "score": 0.36654749330221426, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.07528927678469202, - "sentence_nr": 1 + "score": 0.4872311075347288, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.422513417362817, - "sentence_nr": 1 + "score": 0.6623535404713097, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.06126604215610123, - "sentence_nr": 1 + "score": 0.2757545333222742, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3837677428398438, - "sentence_nr": 1 + "score": 0.4738441342217875, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0756907193511249, - "sentence_nr": 1 + "score": 0.28983869034423043, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4138725093679467, - "sentence_nr": 1 + "score": 0.4975732770770436, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.08866637424249016, - "sentence_nr": 1 + "score": 0.28060741458208943, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.44876462229383973, - "sentence_nr": 1 + "score": 0.496580338229036, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.10574428430204418, - "sentence_nr": 1 + "score": 0.3681900464793093, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.45371814600333005, - "sentence_nr": 1 + "score": 0.5504673899427779, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22107379072983208, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4351068027997583, - "sentence_nr": 1 + "score": 0.4472044729711229, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.09084091756463074, - "sentence_nr": 1 + "score": 0.07383026958055552, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4286741659142759, - "sentence_nr": 1 + "score": 0.18582113429299857, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.32814442346427775, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3980589439671235, - "sentence_nr": 1 + "score": 0.5149345446415335, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4102082155233312, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4357976697626214, - "sentence_nr": 1 + "score": 0.5474039587505726, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.12649672885841734, - "sentence_nr": 1 + "score": 0.22327767951697297, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.41329609863930566, - "sentence_nr": 1 + "score": 0.4063556880747369, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.07465265387221826, - "sentence_nr": 1 + "score": 0.26234851988380015, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4274000630396105, - "sentence_nr": 1 + "score": 0.4686295191568941, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.1219449069656942, - "sentence_nr": 1 + "score": 0.22472032138500259, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4776943038671049, - "sentence_nr": 1 + "score": 0.4363253004030211, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.26999626626742373, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4606409590817001, - "sentence_nr": 1 + "score": 0.4462786481475064, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.07996209785853586, - "sentence_nr": 1 + "score": 0.1523726019659672, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4279137012019699, - "sentence_nr": 1 + "score": 0.4249945948121055, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.1195053737774238, - "sentence_nr": 1 + "score": 0.2556346494160282, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4512136289975786, - "sentence_nr": 1 + "score": 0.4538035440310274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.21748353646757182, - "sentence_nr": 1 + "score": 0.22094141139283613, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4462746462826943, - "sentence_nr": 1 + "score": 0.4436055020535846, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.24443999371485628, - "sentence_nr": 1 + "score": 0.22094141139283613, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4991016392840656, - "sentence_nr": 1 + "score": 0.4436055020535846, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.14283509516492696, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.34155562837143877, - "sentence_nr": 1 + "score": 0.39080133039424786, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.07977475994840084, - "sentence_nr": 1 + "score": 0.2671559406638062, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.41362647590171586, - "sentence_nr": 1 + "score": 0.4577424762656183, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.16764957347186446, - "sentence_nr": 1 + "score": 0.002106149957877001, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.43636719577147937, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.1139393935967296, - "sentence_nr": 1 + "score": 0.5525891004904828, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.12405768590988119, - "sentence_nr": 1 + "score": 0.2512606392489611, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.40898021337964235, - "sentence_nr": 1 + "score": 0.4210700546502019, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.1701935252826955, - "sentence_nr": 1 + "score": 0.3284145915841146, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4455315745640286, - "sentence_nr": 1 + "score": 0.4777770768413136, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.11941817189528041, - "sentence_nr": 1 + "score": 0.32565974985390567, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4275071634813637, - "sentence_nr": 1 + "score": 0.5028101514284876, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.08932983819566953, - "sentence_nr": 1 + "score": 0.2993726298551646, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.412238728569517, - "sentence_nr": 1 + "score": 0.4773135617112927, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.2507373945601762, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.45285904278083683, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.35698569920901285, - "sentence_nr": 1 + "score": 0.29187222866434104, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.1275291133503835, - "sentence_nr": 1 + "score": 0.20649207653440943, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4127884601900206, - "sentence_nr": 1 + "score": 0.45559573554011507, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.22424453668984448, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3538966478758119, - "sentence_nr": 1 + "score": 0.41637444107955873, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2572733200413211, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4179644538349004, - "sentence_nr": 1 + "score": 0.4520014138562526, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.11116091368823534, - "sentence_nr": 1 + "score": 0.23112644289004342, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.43307401079748475, - "sentence_nr": 1 + "score": 0.4228120750850924, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.07649978886725356, - "sentence_nr": 1 + "score": 0.1733396766438206, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.41031664319131844, - "sentence_nr": 1 + "score": 0.37157614360073693, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.27884029427402307, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4808977586027471, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2472606041939042, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3867446668969619, - "sentence_nr": 1 + "score": 0.45364472696939645, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2392120773016637, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4331286519146886, - "sentence_nr": 1 + "score": 0.440445343487272, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.27477175237607115, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.2626949949898101, - "sentence_nr": 1 + "score": 0.4670076076385746, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.21038351069657651, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4345822107320723, - "sentence_nr": 1 + "score": 0.4170983383308206, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3070946890889356, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3795844422349344, - "sentence_nr": 1 + "score": 0.5150001444865586, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2393813524637459, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3964061846611735, - "sentence_nr": 1 + "score": 0.42382875320112184, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.07012053105310272, - "sentence_nr": 1 + "score": 0.18439115006412007, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3123716745719453, - "sentence_nr": 1 + "score": 0.3881716916328596, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4484493594130144, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3856352748003268, - "sentence_nr": 1 + "score": 0.6392061930349952, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0876062628502436, - "sentence_nr": 1 + "score": 0.29277567814514704, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3978552283854932, - "sentence_nr": 1 + "score": 0.47534798335753625, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.34077616827498786, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.43759229210123524, - "sentence_nr": 1 + "score": 0.5278331664063162, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.10505106462290037, - "sentence_nr": 1 + "score": 0.31260047665100127, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4474870048911137, - "sentence_nr": 1 + "score": 0.48667320069984316, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.11534976570369744, - "sentence_nr": 1 + "score": 0.3024740132924786, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.46761329904761845, - "sentence_nr": 1 + "score": 0.4860481290671275, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0487561532099542, - "sentence_nr": 1 + "score": 0.21472597489801595, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3938156291645021, - "sentence_nr": 1 + "score": 0.4190785642335582, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.05624172669013078, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4423346652606821, - "sentence_nr": 1 + "score": 0.3317185957958262, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.11378204941109882, - "sentence_nr": 1 + "score": 0.2624355454690498, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4981472095171313, - "sentence_nr": 1 + "score": 0.48887901649623144, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.40165053057541866, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4098374118843212, - "sentence_nr": 1 + "score": 0.5837756195280097, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.40311197004738203, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4537465621062763, - "sentence_nr": 1 + "score": 0.5788525108956781, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.11335203496873462, - "sentence_nr": 1 + "score": 0.30912713581280643, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.46528080200591054, - "sentence_nr": 1 + "score": 0.5376618148723133, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.22800071662764984, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4763467106828393, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.108829546976023, - "sentence_nr": 1 + "score": 0.42740890037960316, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4177339268402449, - "sentence_nr": 1 + "score": 0.5996769756257465, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.32407268851096743, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.008776218574747889, - "sentence_nr": 1 + "score": 0.5123210866097154, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.09026606980896171, - "sentence_nr": 1 + "score": 0.40487199173556226, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4602880143145438, - "sentence_nr": 1 + "score": 0.5637204315528265, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.19864358811190305, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4239838444198129, - "sentence_nr": 1 + "score": 0.4520034758289839, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.28515736078765247, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.0009218289085545725, - "sentence_nr": 1 + "score": 0.5144923375933018, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.14883746844067872, - "sentence_nr": 1 + "score": 0.2997057270104923, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4476843235219058, - "sentence_nr": 1 + "score": 0.5028660357670663, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.07999819990926477, - "sentence_nr": 1 + "score": 0.35016444607906333, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.3889987132692464, - "sentence_nr": 1 + "score": 0.5318969359864418, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.06058139791899572, - "sentence_nr": 1 + "score": 0.25497731326689277, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4408463415099943, - "sentence_nr": 1 + "score": 0.5321857015823233, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.1059352062327485, - "sentence_nr": 1 + "score": 0.5138735427755269, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4291550754056065, - "sentence_nr": 1 + "score": 0.6711767933025353, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.48917248258655954, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.3916082207331212, - "sentence_nr": 1 + "score": 0.6007992381101284, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.08841818041776058, - "sentence_nr": 1 + "score": 0.45074681913051867, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.47772386173878106, - "sentence_nr": 1 + "score": 0.5985843659278748, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.09171389226334559, - "sentence_nr": 1 + "score": 0.3442651325185116, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.47356776940569145, - "sentence_nr": 1 + "score": 0.534774838547693, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.16136315230667173, - "sentence_nr": 1 + "score": 0.37779254144801305, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.48202529715173736, - "sentence_nr": 1 + "score": 0.5639615032285982, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3411216349302383, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4750212573397775, - "sentence_nr": 1 + "score": 0.524533707137854, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.003607064963668313, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.09577479457615844, - "sentence_nr": 1 + "score": 0.13496104417905996, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.11161133657801552, - "sentence_nr": 1 + "score": 0.36516261117337495, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4277891734340718, - "sentence_nr": 1 + "score": 0.5425947356911068, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.3961285597009415, - "sentence_nr": 2 + "score": 0.49872195941208947, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.6148751441350505, - "sentence_nr": 2 + "score": 0.6907054265096231, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 + "score": 0.45313578977486535, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 + "score": 0.6160993561903745, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.3996712647649035, - "sentence_nr": 2 + "score": 0.37640646218183, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.6353525755760105, - "sentence_nr": 2 + "score": 0.5429063669356702, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.5115346945020283, - "sentence_nr": 2 + "score": 0.4331983607416391, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7037574715738644, - "sentence_nr": 2 + "score": 0.601662300924314, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.49310399113262143, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.6618877666161819, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.44323526910431466, - "sentence_nr": 2 + "score": 0.46511244412293995, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7203175886481126, - "sentence_nr": 2 + "score": 0.6535569144390859, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 + "score": 0.5108628809804742, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 + "score": 0.6929396211173784, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.24715873794308874, - "sentence_nr": 2 + "score": 0.41336688870747906, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.49051792813181655, - "sentence_nr": 2 + "score": 0.5851805477143694, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 + "score": 0.38942582033344114, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 + "score": 0.5648719586230837, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.24508104771894088, - "sentence_nr": 2 + "score": 0.4485994475252126, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.5725552336126134, - "sentence_nr": 2 + "score": 0.6258984728025891, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.543805702633007, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.7108467686156834, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.33608213382072566, - "sentence_nr": 2 + "score": 0.27230544563000586, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.6155314069125684, - "sentence_nr": 2 + "score": 0.511482380897768, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 + "score": 0.5629805730671188, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.26703508536995574, - "sentence_nr": 2 + "score": 0.7184729617045537, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.35315040956049437, - "sentence_nr": 2 + "score": 0.5392691323275658, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.625895188503691, - "sentence_nr": 2 + "score": 0.6950593236840004, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.17374951565433233, - "sentence_nr": 2 + "score": 0.5248291448433852, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.45325597884524305, - "sentence_nr": 2 + "score": 0.6920934053021797, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 + "score": 0.425742897803471, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 + "score": 0.6292455373063424, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 + "score": 0.5098155570066008, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.41580120868053494, - "sentence_nr": 2 + "score": 0.6842801799859595, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.05963579607071745, - "sentence_nr": 2 + "score": 0.410248141700974, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.31139762378406344, - "sentence_nr": 2 + "score": 0.6198489533513304, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.06724888422961112, - "sentence_nr": 2 + "score": 0.0072148746031117554, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.39667480605700844, - "sentence_nr": 2 + "score": 0.09496152255049971, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 + "score": 0.4803498024083505, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 + "score": 0.5963395991179793, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.11346446511593337, - "sentence_nr": 2 + "score": 0.3411271681324882, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.3675317022605926, - "sentence_nr": 2 + "score": 0.5323123267352375, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.2377604053257556, - "sentence_nr": 2 + "score": 0.2651736858432996, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5690390533910819, - "sentence_nr": 2 + "score": 0.4491383344282561, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.10742716472890976, - "sentence_nr": 2 + "score": 0.3082082660061424, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.42694859148910824, - "sentence_nr": 2 + "score": 0.5077609645706764, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.32151064813737534, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5058205933378546, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.19073363590503933, - "sentence_nr": 2 + "score": 0.3292501961779507, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.49895382941569383, - "sentence_nr": 2 + "score": 0.5076613847693546, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.31723554000172904, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5055205673212141, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.14745870033404418, - "sentence_nr": 2 + "score": 0.3335262554878992, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.475170637938921, - "sentence_nr": 2 + "score": 0.5258955094447381, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.26801022984888695, - "sentence_nr": 2 + "score": 0.3516912179205685, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5654883864995515, - "sentence_nr": 2 + "score": 0.5464920089233378, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 + "score": 0.34757716262783495, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 + "score": 0.5456290989918442, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.2735429726790281, - "sentence_nr": 2 + "score": 0.30384210838236353, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5644723203818537, - "sentence_nr": 2 + "score": 0.49573162353290035, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.20223322445648179, - "sentence_nr": 2 + "score": 0.34046691946402124, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5084057058209687, - "sentence_nr": 2 + "score": 0.529497944319035, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.2868985878682555, - "sentence_nr": 2 + "score": 0.20857446308936162, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.591501744009396, - "sentence_nr": 2 + "score": 0.46466122673467003, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.26459538953931094, - "sentence_nr": 2 + "score": 0.4213151123885409, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5272178908335121, - "sentence_nr": 2 + "score": 0.6348713285352305, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.012201453805310429, - "sentence_nr": 2 + "score": 0.3215895303258089, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.063050817196087, - "sentence_nr": 2 + "score": 0.49179447692232336, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.24342570806900707, - "sentence_nr": 2 + "score": 0.3007845437586152, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5341782261409304, - "sentence_nr": 2 + "score": 0.4720170373660879, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.25848476545940924, - "sentence_nr": 2 + "score": 0.35069370820755275, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5525933856866961, - "sentence_nr": 2 + "score": 0.5735580981959628, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.13019082899297843, - "sentence_nr": 2 + "score": 0.3559980252477847, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.40512126305429846, - "sentence_nr": 2 + "score": 0.5455195296019626, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.24071298960902482, - "sentence_nr": 2 + "score": 0.30567695364499425, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5438509851618877, - "sentence_nr": 2 + "score": 0.4994609495460993, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.2063890416514164, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.38567678850872256, - "sentence_nr": 2 + "score": 0.0014196479273140264, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.3295566054952435, - "sentence_nr": 2 + "score": 0.27579736884967815, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5816133441895466, - "sentence_nr": 2 + "score": 0.44560520221877703, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.3574583793293068, - "sentence_nr": 2 + "score": 0.24828430598240606, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5924115119819969, - "sentence_nr": 2 + "score": 0.5078550622606068, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 + "score": 0.34545319957597864, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 + "score": 0.5727052860304503, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.4206507730319955, - "sentence_nr": 2 + "score": 0.4256604038587669, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.678851303587664, - "sentence_nr": 2 + "score": 0.595254482532169, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.35367180741660353, - "sentence_nr": 2 + "score": 0.35948829980203323, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6344846206551544, - "sentence_nr": 2 + "score": 0.5639910704472698, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.4383332081326208, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.606835521790659, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.3428955163829333, - "sentence_nr": 2 + "score": 0.4706737586086326, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6023036718160529, - "sentence_nr": 2 + "score": 0.6417233649435136, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.3563758622144919, - "sentence_nr": 2 + "score": 0.37742688647401873, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6037023613177924, - "sentence_nr": 2 + "score": 0.5674314405993244, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2632018059331281, - "sentence_nr": 2 + "score": 0.30233255481828714, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.501302719796297, - "sentence_nr": 2 + "score": 0.5203155278990113, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.4538010737057216, - "sentence_nr": 2 + "score": 0.35770998339980664, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6446834621229663, - "sentence_nr": 2 + "score": 0.581833855035677, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.20586736678432452, - "sentence_nr": 2 + "score": 0.3625286446151028, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5693016623172978, - "sentence_nr": 2 + "score": 0.6148737881972042, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.27075075499555246, - "sentence_nr": 2 + "score": 0.4012050186129501, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5374328610523021, - "sentence_nr": 2 + "score": 0.624957701947079, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.1455399826828606, - "sentence_nr": 2 + "score": 0.37050354724951784, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.4504825146558032, - "sentence_nr": 2 + "score": 0.5886100035095279, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.175866555062937, - "sentence_nr": 2 + "score": 0.4827832239649112, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.2758687846643748, - "sentence_nr": 2 + "score": 0.6550149284488518, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2948978498692003, - "sentence_nr": 2 + "score": 0.49745618936962227, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5529453973837751, - "sentence_nr": 2 + "score": 0.6671359503342716, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.22292726306270316, - "sentence_nr": 2 + "score": 0.4519603667438429, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5653789747970112, - "sentence_nr": 2 + "score": 0.6009987666643928, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 + "score": 0.5542271267412462, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 + "score": 0.7275049499508799, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.26930937054323245, - "sentence_nr": 2 + "score": 0.4878448947255055, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5410704185827219, - "sentence_nr": 2 + "score": 0.6426554193783609, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.24634765861867908, - "sentence_nr": 2 + "score": 0.4345206442085496, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.55968513851572, - "sentence_nr": 2 + "score": 0.6585506298716566, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.29037747307996287, - "sentence_nr": 2 + "score": 0.327910616954487, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5917006930610393, - "sentence_nr": 2 + "score": 0.5832612672351287, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2158914621804855, - "sentence_nr": 2 + "score": 0.5350666712285949, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5448184155666022, - "sentence_nr": 2 + "score": 0.6501051146578934, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4381699512774638, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.39152357647177133, - "sentence_nr": 2 + "score": 0.5887410281104106, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4054234087021839, - "sentence_nr": 2 + "score": 0.38968867962607934, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.667304751638097, - "sentence_nr": 2 + "score": 0.5581403039390647, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.08175340974854195, - "sentence_nr": 2 + "score": 0.44378279372807367, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.4308342322390109, - "sentence_nr": 2 + "score": 0.5825113284353328, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.34093060419986554, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.3185785286756486, - "sentence_nr": 2 + "score": 0.5050414552947896, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.3268233487541633, - "sentence_nr": 2 + "score": 0.4312966248043606, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.6084114123608597, - "sentence_nr": 2 + "score": 0.5796681637399156, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.35491112745119674, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.04759937639788563, - "sentence_nr": 2 + "score": 0.5468570448895366, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.2500653935141143, - "sentence_nr": 2 + "score": 0.339818403012025, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.585528867886047, - "sentence_nr": 2 + "score": 0.5156759219303986, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.21403222128228389, - "sentence_nr": 2 + "score": 0.3521993875851369, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.563121432204311, - "sentence_nr": 2 + "score": 0.539388626519818, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 + "score": 0.30451244884661904, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 + "score": 0.5270886236273997, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.18505378795140082, - "sentence_nr": 2 + "score": 0.32155220285195785, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.47051087423292237, - "sentence_nr": 2 + "score": 0.5502401579986564, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.09807167131529582, - "sentence_nr": 2 + "score": 0.4173085585041339, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.4646043403137081, - "sentence_nr": 2 + "score": 0.5601628790215745, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.12049505059461789, - "sentence_nr": 2 + "score": 0.17519022150205726, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.49305039430054654, - "sentence_nr": 2 + "score": 0.3999732122899595, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.18031307339768174, - "sentence_nr": 2 + "score": 0.5730497086675282, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.522164454804456, - "sentence_nr": 2 + "score": 0.7127638643645163, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.037874984245935134, - "sentence_nr": 2 + "score": 0.3981933677617053, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.13083094614009624, - "sentence_nr": 2 + "score": 0.5407088037569207, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.30865045220428267, - "sentence_nr": 2 + "score": 0.4647350187265495, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.61517480898171, - "sentence_nr": 2 + "score": 0.6141693179612359, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.20064110494011925, - "sentence_nr": 2 + "score": 0.38621174932063007, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.5205761630334527, - "sentence_nr": 2 + "score": 0.5629586598269498, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.0684792839692368, - "sentence_nr": 2 + "score": 0.4173085585041339, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.3138898863773231, - "sentence_nr": 2 + "score": 0.572728665781863, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.1573857459340795, - "sentence_nr": 2 + "score": 0.4031413391988539, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.5347526444819753, - "sentence_nr": 2 + "score": 0.5857356072592188, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.008180069062416927, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.04605877529742035, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.13904320686250593, - "sentence_nr": 2 + "score": 0.431633629801714, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.47169365083525167, - "sentence_nr": 2 + "score": 0.5611890334237722, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.4054983797456263, - "sentence_nr": 2 + "score": 0.17727473966694943, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6264774230839022, - "sentence_nr": 2 + "score": 0.3475071694578125, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 + "score": 0.20451416608402828, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 + "score": 0.38185285396290036, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.43870712112271204, - "sentence_nr": 2 + "score": 0.24678030799496634, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6525926696001584, - "sentence_nr": 2 + "score": 0.4463603005685723, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.4207445490015154, - "sentence_nr": 2 + "score": 0.17736142488062245, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6496192656497308, - "sentence_nr": 2 + "score": 0.3654025502565916, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20739970432549085, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.42437033953049846, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.36781689904382464, - "sentence_nr": 2 + "score": 0.10962812839170834, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6454684777803729, - "sentence_nr": 2 + "score": 0.3474392467819317, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2999092588227898, - "sentence_nr": 2 + "score": 0.12843096555088776, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5505916495384416, - "sentence_nr": 2 + "score": 0.3356201430079791, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2772655014585435, - "sentence_nr": 2 + "score": 0.09963289028859902, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.4799723286048352, - "sentence_nr": 2 + "score": 0.3282252226526078, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 + "score": 0.1304767107567893, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 + "score": 0.31187491537917433, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2516768028374535, - "sentence_nr": 2 + "score": 0.14222939605129875, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.49572209766846287, - "sentence_nr": 2 + "score": 0.32212719342865237, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2958351954606211, - "sentence_nr": 2 + "score": 0.2530426809357554, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5202221091638364, - "sentence_nr": 2 + "score": 0.4243735004657182, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3471036105446511, - "sentence_nr": 2 + "score": 0.23231698016646643, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5663019495273462, - "sentence_nr": 2 + "score": 0.3975460179474858, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.009070964338765818, - "sentence_nr": 2 + "score": 0.31347763183631233, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.06852404470758497, - "sentence_nr": 2 + "score": 0.5290306159372288, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3212066202235163, - "sentence_nr": 2 + "score": 0.21424220062855565, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5836558214123343, - "sentence_nr": 2 + "score": 0.419670209137084, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.20051119758906127, - "sentence_nr": 2 + "score": 0.24809323900653618, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5334791309401924, - "sentence_nr": 2 + "score": 0.455254011012136, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 + "score": 0.22798424876104878, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 + "score": 0.4315883077530936, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.23724642034775328, - "sentence_nr": 2 + "score": 0.19800382589668103, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5175129869169551, - "sentence_nr": 2 + "score": 0.39444773256270804, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.1849419409628554, - "sentence_nr": 2 + "score": 0.18542575245571596, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5067677916637257, - "sentence_nr": 2 + "score": 0.3788879537776753, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.49804532928450235, - "sentence_nr": 2 + "score": 0.0013506212857914643, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.22750547588410633, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.4263684749347053, - "sentence_nr": 2 + "score": 0.4160796302144522, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.5494025263062274, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.4228574070038002, - "sentence_nr": 2 + "score": 0.6860161543947312, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.39464291294535175, - "sentence_nr": 2 + "score": 0.2636405082687104, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6481030286156323, - "sentence_nr": 2 + "score": 0.5072009470421238, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.11126509848873964, - "sentence_nr": 2 + "score": 0.3994138413590059, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.4338923576538663, - "sentence_nr": 2 + "score": 0.6335699196668345, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.32440820201863096, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.31311320826536454, - "sentence_nr": 2 + "score": 0.5816024759666973, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.17236491061326006, - "sentence_nr": 2 + "score": 0.34337934672029974, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5548663878579595, - "sentence_nr": 2 + "score": 0.5312561179539695, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4403035618887612, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.3540506408782035, - "sentence_nr": 2 + "score": 0.6631416039442045, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.08906092883748383, - "sentence_nr": 2 + "score": 0.3536676112393946, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.4317746285352776, - "sentence_nr": 2 + "score": 0.5252283198216768, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.3535002370419364, - "sentence_nr": 2 + "score": 0.19029342592119838, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5959879218348465, - "sentence_nr": 2 + "score": 0.46626570553905117, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.19029342592119838, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.4672523665085142, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.30860365223174097, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.5323991480984563, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.38333108639273095, - "sentence_nr": 2 + "score": 0.41255466997762913, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6252821653079126, - "sentence_nr": 2 + "score": 0.6386796139644701, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.30783677787322206, - "sentence_nr": 2 + "score": 0.27779829164283953, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6417108947268295, - "sentence_nr": 2 + "score": 0.5451746888418362, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.2562849004088193, - "sentence_nr": 2 + "score": 0.5747867448954953, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5767019342009202, - "sentence_nr": 2 + "score": 0.7236969475311006, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.19851743023355672, - "sentence_nr": 2 + "score": 0.5046818724935341, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.49793621556542356, - "sentence_nr": 2 + "score": 0.6768618695861154, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.44464935391849836, - "sentence_nr": 2 + "score": 0.4698824517223119, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.696223947326307, - "sentence_nr": 2 + "score": 0.6509974368827985, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.10954782904363085, - "sentence_nr": 2 + "score": 0.465943811426769, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5090382887002297, - "sentence_nr": 2 + "score": 0.646929348297808, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.1614809742656655, - "sentence_nr": 2 + "score": 0.47785728909212377, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.4145218112165384, - "sentence_nr": 2 + "score": 0.688922443358261, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.2834484329788497, - "sentence_nr": 2 + "score": 0.34423833594189435, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5201572704778937, - "sentence_nr": 2 + "score": 0.576428944148875, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.006569332862878646, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.060864196135666904, - "sentence_nr": 2 + "score": 0.001973164956590371, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.2756885721075884, - "sentence_nr": 2 + "score": 0.39626726411474644, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5867077870431389, - "sentence_nr": 2 + "score": 0.5928013371853409, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.35983766090218355, - "sentence_nr": 2 + "score": 0.4222656487192343, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5862251404739759, - "sentence_nr": 2 + "score": 0.6421614792137705, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 + "score": 0.20174173621464261, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 + "score": 0.5179166118048267, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.3563982585943877, - "sentence_nr": 2 + "score": 0.5116634146141776, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5378970484635915, - "sentence_nr": 2 + "score": 0.6950231685488834, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.1510722413165652, - "sentence_nr": 2 + "score": 0.27720246067551324, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.43592329727028295, - "sentence_nr": 2 + "score": 0.543353961410956, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.4509728065040443, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6500555759069969, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2162882016096312, - "sentence_nr": 2 + "score": 0.2481045172535937, + "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5347607537943324, - "sentence_nr": 2 + "score": 0.5296542200314776, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2465888500427759, - "sentence_nr": 2 + "score": 0.2348553453946444, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5221084445696768, - "sentence_nr": 2 + "score": 0.4891959123914518, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.13240628161243978, - "sentence_nr": 2 + "score": 0.18939338506411268, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.3347576434758551, - "sentence_nr": 2 + "score": 0.49051459363589717, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.37081839104772296, - "sentence_nr": 2 + "score": 0.07066046637883687, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6563960892873841, - "sentence_nr": 2 + "score": 0.3470742306712029, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.31372333533981844, - "sentence_nr": 2 + "score": 0.1658317981046275, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5741396495481692, - "sentence_nr": 2 + "score": 0.4396479745504188, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.18943569604789887, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.45232508607731536, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.4583603882613907, - "sentence_nr": 2 + "score": 0.0051795097123194895, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.671355324267905, - "sentence_nr": 2 + "score": 0.0769184040221202, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.10077062063331403, - "sentence_nr": 2 + "score": 0.5030879352944162, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.09760482860544632, - "sentence_nr": 2 + "score": 0.6846791297914651, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2600884210903425, - "sentence_nr": 2 + "score": 0.5055953275836201, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.531430106996609, - "sentence_nr": 2 + "score": 0.6788641233135904, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2971752224486841, - "sentence_nr": 2 + "score": 0.42072143291659103, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.605133664481872, - "sentence_nr": 2 + "score": 0.6265956117333142, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2329856851831642, - "sentence_nr": 2 + "score": 0.5079711118438801, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5405751250637106, - "sentence_nr": 2 + "score": 0.6900890595896133, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2563564295134795, - "sentence_nr": 2 + "score": 0.44456824349458923, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5499025328773104, - "sentence_nr": 2 + "score": 0.6567673471796899, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2500653935141143, - "sentence_nr": 2 + "score": 0.1258024774154196, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5098952451698188, - "sentence_nr": 2 + "score": 0.39536132018550907, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2537710754125116, - "sentence_nr": 2 + "score": 0.07446712399912313, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5898314098579354, - "sentence_nr": 2 + "score": 0.30485244492635144, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.22150370805587954, - "sentence_nr": 2 + "score": 0.3394216003840941, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5463488388082953, - "sentence_nr": 2 + "score": 0.5399361239256494, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.14574402656519908, - "sentence_nr": 2 + "score": 0.353179331599201, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4275543759804943, - "sentence_nr": 2 + "score": 0.5558881348090785, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.3319944964021059, - "sentence_nr": 2 + "score": 0.33522833358360765, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6676204564640195, - "sentence_nr": 2 + "score": 0.534195929930943, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.24750028117795922, - "sentence_nr": 2 + "score": 0.3279338213872338, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5894646098566614, - "sentence_nr": 2 + "score": 0.5206722319482356, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.1900249500296748, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.4570054063295732, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.2687379663485886, - "sentence_nr": 2 + "score": 0.3417785003224633, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5826619907747026, - "sentence_nr": 2 + "score": 0.5213383973963687, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.15897333608001968, - "sentence_nr": 2 + "score": 0.33082955843497286, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4496168003395693, - "sentence_nr": 2 + "score": 0.5333110333825679, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.15089318423122544, - "sentence_nr": 2 + "score": 0.3461114139111442, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4042478943311393, - "sentence_nr": 2 + "score": 0.5537111972654953, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.2786312783602775, - "sentence_nr": 2 + "score": 0.2753998085739322, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4836796407825139, - "sentence_nr": 2 + "score": 0.5086546717198241, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.41756686236967944, - "sentence_nr": 2 + "score": 0.29736692860269925, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5616829345739638, - "sentence_nr": 2 + "score": 0.5307507954839085, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.4205004825822372, - "sentence_nr": 2 + "score": 0.389868366744335, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5674537639314233, - "sentence_nr": 2 + "score": 0.6181891240638018, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.40974323819644953, - "sentence_nr": 2 + "score": 0.3666868345821895, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5368112087257564, - "sentence_nr": 2 + "score": 0.5683418005578137, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.31664648301486426, - "sentence_nr": 2 + "score": 0.1725434802959889, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5144731747994161, - "sentence_nr": 2 + "score": 0.4679048179971906, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.39461811323775403, - "sentence_nr": 2 + "score": 0.5181493032572185, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5655204109921267, - "sentence_nr": 2 + "score": 0.6641637236851636, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.219672574669477, - "sentence_nr": 2 + "score": 0.3559980252477847, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.37413906702142435, - "sentence_nr": 2 + "score": 0.5099636935342754, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 + "score": 0.37858398735109683, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 + "score": 0.5535757116038853, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.3116520879159789, - "sentence_nr": 2 + "score": 0.3201978307646018, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.47517792402030584, - "sentence_nr": 2 + "score": 0.5281184078781382, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.41470071559182964, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.6136976280042111, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.3627923367798331, - "sentence_nr": 2 + "score": 0.020243598198744116, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5255399246733422, - "sentence_nr": 2 + "score": 0.22949385525084642, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.12870376210497989, - "sentence_nr": 2 + "score": 0.1627842130495941, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.22151451171035633, - "sentence_nr": 2 + "score": 0.3513085830979839, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.30181468526956173, - "sentence_nr": 2 + "score": 0.30576442771176066, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5261802780475523, - "sentence_nr": 2 + "score": 0.5175418593642837, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.39579112101105834, - "sentence_nr": 2 + "score": 0.4397415106513502, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6431490866428237, - "sentence_nr": 2 + "score": 0.5907735810868658, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.38189567401226293, - "sentence_nr": 2 + "score": 0.31308824228412185, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6154314825900052, - "sentence_nr": 2 + "score": 0.4950165423717857, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.3436153961225413, - "sentence_nr": 2 + "score": 0.28646584019908145, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5954254642696512, - "sentence_nr": 2 + "score": 0.4746124656486252, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.4339219137216798, - "sentence_nr": 2 + "score": 0.21240535233702176, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6338401824373191, - "sentence_nr": 2 + "score": 0.3985917300395283, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.3842999367495742, - "sentence_nr": 2 + "score": 0.4261122398801008, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5816504483384909, - "sentence_nr": 2 + "score": 0.5780599654568969, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.25564177137418986, - "sentence_nr": 2 + "score": 0.24013077509836228, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.49870011615602194, - "sentence_nr": 2 + "score": 0.45691689611827113, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.27946415227589155, - "sentence_nr": 2 + "score": 0.3734491516745214, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.4938296655037709, - "sentence_nr": 2 + "score": 0.5499493819792871, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.546590882357469, - "sentence_nr": 2 + "score": 0.11197425295329791, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7387035187469114, - "sentence_nr": 2 + "score": 0.34192899545806504, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.26925601229087914, - "sentence_nr": 2 + "score": 0.08586319437724399, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6050395148484196, - "sentence_nr": 2 + "score": 0.28186139496120405, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.27336087678628246, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.4919779927233182, - "sentence_nr": 2 + "score": 0.18416226663843008, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.43235877156651625, - "sentence_nr": 2 + "score": 0.02935045498987271, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.614485867381761, - "sentence_nr": 2 + "score": 0.18487351069843888, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.012951112459987979, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.11882277038397698, - "sentence_nr": 2 + "score": 0.16506895155186319, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.3272963527043486, - "sentence_nr": 2 + "score": 0.44783369619727237, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5715613564297359, - "sentence_nr": 2 + "score": 0.6267235013545246, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.35253338922743144, - "sentence_nr": 2 + "score": 0.31385387863650493, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6487975154557831, - "sentence_nr": 2 + "score": 0.4830034520070859, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.2126707920684064, - "sentence_nr": 2 + "score": 0.030041173262958625, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.4659908460634765, - "sentence_nr": 2 + "score": 0.16461275738712375, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.2914880531303981, - "sentence_nr": 2 + "score": 0.28710736118585223, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5962886968213414, - "sentence_nr": 2 + "score": 0.47019373110040275, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.23944666570758283, - "sentence_nr": 2 + "score": 0.324978583787703, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5106509239874657, - "sentence_nr": 2 + "score": 0.5176680074900262, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.2258043389079604, - "sentence_nr": 2 + "score": 0.1527358476261763, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.528195718512124, - "sentence_nr": 2 + "score": 0.37130823751862757, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.15985840708020788, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.44951053332729884, - "sentence_nr": 2 + "score": 0.02574148824389794, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.17920531400657588, - "sentence_nr": 2 + "score": 0.2111955699760469, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.4522763055702811, - "sentence_nr": 2 + "score": 0.428831679677381, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 + "score": 0.46890796443667687, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 + "score": 0.6445795993451092, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.26199400535088346, - "sentence_nr": 2 + "score": 0.30704694388456133, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5804827870380099, - "sentence_nr": 2 + "score": 0.5217468869740803, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.13442725522288548, - "sentence_nr": 2 + "score": 0.3675770737978913, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.39067504005337655, - "sentence_nr": 2 + "score": 0.5833872029429698, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.20533250289138671, - "sentence_nr": 2 + "score": 0.2381080412543041, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.49681810344665644, - "sentence_nr": 2 + "score": 0.4678770958208047, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.0950330051810703, - "sentence_nr": 2 + "score": 0.4648698750879596, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.2535554509913635, - "sentence_nr": 2 + "score": 0.6377045673268608, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.20378172261136207, - "sentence_nr": 2 + "score": 0.3558905194860598, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.48600008237332104, - "sentence_nr": 2 + "score": 0.5570369132115492, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.24513414885202045, - "sentence_nr": 2 + "score": 0.3612717557348476, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5476647609559218, - "sentence_nr": 2 + "score": 0.5558371668340614, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 + "score": 0.1619935148676164, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.4973274282641141, - "sentence_nr": 2 + "score": 0.4271616542112861, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.2516768028374535, - "sentence_nr": 2 + "score": 0.15992948177167177, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.47249781871556595, - "sentence_nr": 2 + "score": 0.42371338929019253, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.16105265992626083, - "sentence_nr": 2 + "score": 0.12508074021419405, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.404377371664668, - "sentence_nr": 2 + "score": 0.39088781423976093, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "mistralai/mistral-saba", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.2573392925035755, - "sentence_nr": 2 + "score": 0.2501291156194984, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "mistralai/mistral-saba", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5328565784271402, - "sentence_nr": 2 + "score": 0.4242027531340573, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.27217589854489177, - "sentence_nr": 2 + "score": 0.10727279557648407, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5756343666825848, - "sentence_nr": 2 + "score": 0.33374335628033724, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.15813859795767055, - "sentence_nr": 2 + "score": 0.5488130346879996, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.44607340294350173, - "sentence_nr": 2 + "score": 0.720419730625973, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 + "score": 0.4269590215068612, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 + "score": 0.6090516838507328, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.20533250289138671, - "sentence_nr": 2 + "score": 0.3699375619378516, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.45975635079501215, - "sentence_nr": 2 + "score": 0.5932397042974766, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.0698714799763323, - "sentence_nr": 2 + "score": 0.4346391355101555, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.18217918401705574, - "sentence_nr": 2 + "score": 0.648783727613815, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.3077422016953529, - "sentence_nr": 2 + "score": 0.3994868082159731, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5943673820353285, - "sentence_nr": 2 + "score": 0.6159441456463444, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.0013854253255749516, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.09564571510780719, - "sentence_nr": 2 + "score": 0.24647819790998704, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.1907009110214351, - "sentence_nr": 2 + "score": 0.4673628408395248, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.10353153556093725, - "sentence_nr": 2 + "score": 0.35045670972629744, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.40215410362634535, - "sentence_nr": 2 + "score": 0.564796827816794, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.07407154448063642, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.7882997401328445, - "sentence_nr": 3 + "score": 0.43145434527321425, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 + "score": 0.12903696060775005, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.456225988032654, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.024459391267874976, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4855332614117322, - "sentence_nr": 3 + "score": 0.12351824822447692, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5299556742893647, - "sentence_nr": 3 + "score": 0.46822754470803873, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.16221060929249448, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5093553101634974, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4718547623527638, - "sentence_nr": 3 + "score": 0.4463892736716987, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 + "score": 0.15815751066481462, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 + "score": 0.5152611872266766, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.14461769822288426, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4699313689038472, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.14461769822288426, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4699313689038472, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5103516764863386, - "sentence_nr": 3 + "score": 0.4031456247133876, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.15865962437786532, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5308758516723786, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.08516700886866406, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4091252890943268, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4646232199104102, - "sentence_nr": 3 + "score": 0.1583163321869008, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5678926447384061, - "sentence_nr": 3 + "score": 0.5032511267062394, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.4370196290761142, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.20669086265781264, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5076721272198604, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.6960917409740967, - "sentence_nr": 3 + "score": 0.15611634095633747, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.8209757784637755, - "sentence_nr": 3 + "score": 0.5075814499747183, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.11552841238377509, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.41545719136805614, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.14679153443080498, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.43723267444847275, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.14790264259417688, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.27159767590045303, - "sentence_nr": 3 + "score": 0.4122750002638689, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.15412719160788987, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6849386986272349, - "sentence_nr": 3 + "score": 0.5010353699512481, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.25552199116069907, - "sentence_nr": 3 + "score": 0.06647168102389285, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3799133205289109, - "sentence_nr": 3 + "score": 0.34350832619898364, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.12560672881768975, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 + "score": 0.4969560260291519, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 + "score": 0.17077058518804336, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.22090491782919655, - "sentence_nr": 3 + "score": 0.5022008374701596, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 3 + "score": 0.10784756064735967, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.280413108453108, - "sentence_nr": 3 + "score": 0.4427230465401631, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.079733958307467, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.44184113874318065, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.13857814312261593, - "sentence_nr": 3 + "score": 0.05534265631745826, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.26136391784313634, - "sentence_nr": 3 + "score": 0.39688946206212833, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 + "score": 0.12369892692249995, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 + "score": 0.44549610902403686, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.14876360706338185, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.48334700075785475, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.14816972851445565, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4827506956539706, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.139800134566647, - "sentence_nr": 3 + "score": 0.06656213940646748, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2510112235832054, - "sentence_nr": 3 + "score": 0.38435741328258305, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0919956586924294, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4320605493117323, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.42723260976616784, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.1767874865365185, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.31807700660641347, - "sentence_nr": 3 + "score": 0.42954139521687473, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3532931581623198, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0925329498915617, - "sentence_nr": 3 + "score": 0.175396614619324, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2110486160692096, - "sentence_nr": 3 + "score": 0.49736499605529066, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.1609675245202845, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", + "score": 0.5069863833094232, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.39861856844560895, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 3 + "score": 0.10802866550123454, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.41654484827391225, - "sentence_nr": 3 + "score": 0.4137246391706996, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -18033,207 +20174,319 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.12453389344594705, - "sentence_nr": 3 + "score": 0.4041678259311437, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.141543757252386, - "sentence_nr": 3 + "score": 0.1290514243115152, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2594145364221844, - "sentence_nr": 3 + "score": 0.4766581477336301, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.09735981717515908, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.512762518189388, - "sentence_nr": 3 + "score": 0.35288934658906385, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.08273178236238297, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.36399666460809255, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.581972638479957, - "sentence_nr": 3 + "score": 0.13012870333257068, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6970914528585833, - "sentence_nr": 3 + "score": 0.3852835519852091, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.44120063733294235, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5296624608564717, - "sentence_nr": 3 + "score": 0.3356633416447032, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.09656914574218514, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.38029971714040783, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4272870063962341, - "sentence_nr": 3 + "score": 0.09553543457519309, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5425603129070803, - "sentence_nr": 3 + "score": 0.3531525294256142, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 3 + "score": 0.12422788549118892, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6181373706707737, - "sentence_nr": 3 + "score": 0.40222210564426, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12020040946693893, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.42011426633442717, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12020040946693893, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.42011426633442717, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4440750605884706, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5402588602256685, - "sentence_nr": 3 + "score": 0.28789057461471257, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.11453355784159679, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3913231805957233, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.058502903451539655, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31167095759390945, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.1568418931847707, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6834390596430621, - "sentence_nr": 3 + "score": 0.41485110412488607, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6458552885189878, - "sentence_nr": 3 + "score": 0.08628172140775388, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7468283944111381, - "sentence_nr": 3 + "score": 0.40272097867515005, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4272870063962341, - "sentence_nr": 3 + "score": 0.10467757347424328, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5170917334956868, - "sentence_nr": 3 + "score": 0.36749853206282146, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.43310177167002284, - "sentence_nr": 3 + "score": 0.13026649757585426, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.534533410927948, - "sentence_nr": 3 + "score": 0.41550755035304077, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.07102549926281305, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.36283847689753107, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.07771118177065015, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.34810630778111895, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -18241,623 +20494,959 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.26481979271706185, - "sentence_nr": 3 + "score": 0.039782861678265974, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.3212854967972961, - "sentence_nr": 3 + "score": 0.1175904695048123, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.47171327621770304, - "sentence_nr": 3 + "score": 0.3996881234028031, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.4577275269488853, - "sentence_nr": 3 + "score": 0.11564012893219777, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6747054474171109, - "sentence_nr": 3 + "score": 0.44599783682350064, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 + "score": 0.12601482779921785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 + "score": 0.43595665254608706, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.3508739523842563, - "sentence_nr": 3 + "score": 0.12022286401047096, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5533976153694653, - "sentence_nr": 3 + "score": 0.48279986805368713, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.23705266435224473, - "sentence_nr": 3 + "score": 0.15350377490367967, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.44716007458096513, - "sentence_nr": 3 + "score": 0.47645148444499064, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4329131730483987, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.27046570133003095, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3736509638751225, - "sentence_nr": 3 + "score": 0.4042977714165801, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.13714845589364738, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.35447530946908884, - "sentence_nr": 3 + "score": 0.45499281593451946, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.15187655550578968, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4991359434734445, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.15187655550578968, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4991359434734445, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.25530635525095574, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4224404198283467, - "sentence_nr": 3 + "score": 0.38785209659947417, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.07880540498630446, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.43116404282677934, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.08174633970003824, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4119415264378122, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.30147856626075187, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4883780556286986, - "sentence_nr": 3 + "score": 0.43122763125947844, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.44392090655418587, - "sentence_nr": 3 + "score": 0.07593921750963317, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5678926447384061, - "sentence_nr": 3 + "score": 0.459811901287104, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.12848168928706002, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4421263683867116, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.40891568776497583, - "sentence_nr": 3 + "score": 0.09198045184317984, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.46522329223142805, - "sentence_nr": 3 + "score": 0.4598393646838097, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13191941029086238, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4054693592509284, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.10246963414578486, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.40167806467929934, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.11436433361427001, - "sentence_nr": 3 + "score": 0.07798530247118374, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.23221971735799607, - "sentence_nr": 3 + "score": 0.4006113700211268, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.18580985894574314, - "sentence_nr": 3 + "score": 0.14541923959059266, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3347249292100999, - "sentence_nr": 3 + "score": 0.47577612932999147, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.27668736912821895, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4414406760568898, - "sentence_nr": 3 + "score": 0.2521233582161207, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.2465659486053858, - "sentence_nr": 3 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5689069160047179, - "sentence_nr": 3 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3182970443542658, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.30391153783979835, - "sentence_nr": 3 + "score": 0.5953162569846108, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.21544268192394778, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5620944560874692, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.12887696534828325, - "sentence_nr": 3 + "score": 0.22238763863547817, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.30114368429557287, - "sentence_nr": 3 + "score": 0.5765887803460186, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.15138514598766048, - "sentence_nr": 3 + "score": 0.3026566818840519, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3237497764315872, - "sentence_nr": 3 + "score": 0.5945859352092411, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3939307348079401, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.620481013358287, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3939307348079401, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.620481013358287, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.1544458227548897, - "sentence_nr": 3 + "score": 0.18816868192268246, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3343587266874694, - "sentence_nr": 3 + "score": 0.5179253053631742, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.40959087443621306, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6348509381122925, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3433850255029819, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6024262111925348, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.43310177167002284, - "sentence_nr": 3 + "score": 0.23522101642407195, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6107152353968289, - "sentence_nr": 3 + "score": 0.5302138314227511, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.23817261442630488, - "sentence_nr": 3 + "score": 0.10416245702438577, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.448286611717823, - "sentence_nr": 3 + "score": 0.464679503899398, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.09629060614977814, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.43565498999747165, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.20679845323803403, - "sentence_nr": 3 + "score": 0.28418123342684043, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.47636494608150104, - "sentence_nr": 3 + "score": 0.539816402671069, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.23664926064855632, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5547657702782856, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18721276943014145, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5089836023745591, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3765959322920135, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6295826606382191, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.09147827112247602, - "sentence_nr": 3 + "score": 0.40801269202545287, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3258762519783793, - "sentence_nr": 3 + "score": 0.6210533025653295, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.0867932999243575, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6159319815107203, - "sentence_nr": 3 + "score": 0.4201964133235075, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 + "score": 0.3969463877642616, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.1897299381066278, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2391308148553106, - "sentence_nr": 3 + "score": 0.5086851537953713, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.18180608220159192, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.27307753334479423, - "sentence_nr": 3 + "score": 0.3833939462124923, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.14965975078050625, - "sentence_nr": 3 + "score": 0.08296825794514656, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.22213502776474325, - "sentence_nr": 3 + "score": 0.38492923596577555, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.15604242268653643, - "sentence_nr": 3 + "score": 0.06036796843527163, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.37581426760977427, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.07793031063789554, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2255928425212252, - "sentence_nr": 3 + "score": 0.3700181221537743, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.10204728251899628, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.32811536740958436, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1072035287734848, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.38653021859049697, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.14965975078050625, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.22213502776474325, - "sentence_nr": 3 + "score": 0.2718653389257641, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.13784847984621976, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.38056078135358473, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.06831564014494437, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.40842567407749947, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.49897421349092935, - "sentence_nr": 3 + "score": 0.4551974335055929, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3160946016179871, - "sentence_nr": 3 + "score": 0.09866644266774938, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.407876439044591, - "sentence_nr": 3 + "score": 0.4285845620274377, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.025108530586642898, - "sentence_nr": 3 + "score": 0.3327209336079636, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.17466240109087192, - "sentence_nr": 3 + "score": 0.1777835117834348, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2719194508460068, - "sentence_nr": 3 + "score": 0.5166806073547074, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4056741942217607, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3500358865894883, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -18865,207 +21454,319 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.12316365460790003, - "sentence_nr": 3 + "score": 0.009396473650937872, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.13582906387565688, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.35862918415512257, - "sentence_nr": 3 + "score": 0.43344913217266734, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.78479833664205, - "sentence_nr": 3 + "score": 0.3630576975795868, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 + "score": 0.0744904632040495, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 + "score": 0.4111163205685468, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.4363130300030932, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.3267294026204632, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4510525482602028, - "sentence_nr": 3 + "score": 0.41747276065817185, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4278386148807753, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.33065826652002533, - "sentence_nr": 3 + "score": 0.401865675252717, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.24728515687112834, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3088155734423375, - "sentence_nr": 3 + "score": 0.41649654108052436, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.42978885796805627, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.42978885796805627, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.07749370908741021, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.27718461611705486, - "sentence_nr": 3 + "score": 0.3853293582383978, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4303918965149948, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.36273754948571285, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.08025555322499443, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.40116594181297777, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.43416784832299543, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5365920629514802, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6274039030337838, - "sentence_nr": 3 + "score": 0.43622390508229153, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", + "score": 0.4250905063113662, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3674996461080071, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4578226095312774, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5406295999835291, - "sentence_nr": 3 + "score": 0.39174006736497724, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -19073,495 +21774,719 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.002054231717337716, - "sentence_nr": 3 + "score": 0.36347800793516216, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.12286996020967837, - "sentence_nr": 3 + "score": 0.06254678076846341, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.31567668741706395, - "sentence_nr": 3 + "score": 0.3887428577633272, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4815092081725061, - "sentence_nr": 3 + "score": 0.08616711094288851, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5820265218174012, - "sentence_nr": 3 + "score": 0.3696512763473903, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 + "score": 0.4486368934849452, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.36210097004176117, - "sentence_nr": 3 + "score": 0.08825252192863794, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.408098151133905, - "sentence_nr": 3 + "score": 0.4377853721520782, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3165014630070639, - "sentence_nr": 3 + "score": 0.05345137572833361, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.42516173623967946, - "sentence_nr": 3 + "score": 0.3829169125379508, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4024696872829392, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3685289119518548, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.47062358557598893, - "sentence_nr": 3 + "score": 0.3404791678264965, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.353203510510529, - "sentence_nr": 3 + "score": 0.08767210132815903, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4910213297498164, - "sentence_nr": 3 + "score": 0.40476518002703893, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.16673716541498604, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.45694863486516263, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13578223194911257, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4091221496995749, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.15820362165931962, - "sentence_nr": 3 + "score": 0.05422898988559086, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.2249046365436241, - "sentence_nr": 3 + "score": 0.335890201952113, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.06482491239183016, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3859593133534678, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3916659692165871, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4185938787651429, - "sentence_nr": 3 + "score": 0.08465714266003518, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6016367461945803, - "sentence_nr": 3 + "score": 0.41240280272579144, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4753167451887016, - "sentence_nr": 3 + "score": 0.074350530956551, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6372909532389948, - "sentence_nr": 3 + "score": 0.4346408669855704, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.34617921188455225, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.38317923930200504, - "sentence_nr": 3 + "score": 0.10186730973904586, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.47975624978837655, - "sentence_nr": 3 + "score": 0.43665642120840553, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.06552322707010354, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3576301367090791, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3731922614950606, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.08248974616169381, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.2054194471318506, - "sentence_nr": 3 + "score": 0.40456777770242314, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.25678404806291744, - "sentence_nr": 3 + "score": 0.10496714075880566, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.37045149029437513, - "sentence_nr": 3 + "score": 0.4262440114275301, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.15386029327005746, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.43911482594829104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.10070927557742705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.43718220262892105, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.11478463129234825, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.4651957501593415, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1243018504102695, - "sentence_nr": 3 + "score": 0.07137101582673294, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.32950116238735283, - "sentence_nr": 3 + "score": 0.4075406301092705, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0846581996011045, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.45098657687728133, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.20664181816537014, - "sentence_nr": 3 + "score": 0.09107675218561961, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.371359687688326, - "sentence_nr": 3 + "score": 0.4297036775694859, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.23660362391696813, - "sentence_nr": 3 + "score": 0.14738500064905094, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.34152697838249696, - "sentence_nr": 3 + "score": 0.4659728395318289, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.16934590059353366, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.48466724462095995, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.16934590059353366, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.48466724462095995, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.369345079296433, - "sentence_nr": 3 + "score": 0.0643329477522681, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5103516764863386, - "sentence_nr": 3 + "score": 0.3960585990192623, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.06883375756323683, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4079414323089176, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.06225053846006199, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.33576700828174977, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.1262345212021199, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.4305259421555756, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.4502540674899478, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.17247941414020762, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.48320144379865687, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.36763082847636347, - "sentence_nr": 3 + "score": 0.09751270821852938, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.45637140510576385, - "sentence_nr": 3 + "score": 0.395617758442078, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.07222350715877336, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.40189924507920805, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.05534265631745826, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.37576775323350164, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.06301432444316532, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.16935976352352106, - "sentence_nr": 3 + "score": 0.4249725532507508, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.31268514922728713, - "sentence_nr": 3 + "score": 0.18248753930464637, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.41990725085948355, - "sentence_nr": 3 + "score": 0.4759830743101189, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.1946966569103724, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0772718393063023, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4203683137304257, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.08291357159799752, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4009694996956877, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -19569,543 +22494,879 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3714280466838255, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.08899053269919978, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.402294028439773, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.10639077953820089, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.44452351549730684, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3370100422576744, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.38053031314827857, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3984985732544162, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3538602132402044, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.11741368553792744, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4489574295483002, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.35993446555861985, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.09058017202418268, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.45761691752578293, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.10490867528469339, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4381872831026742, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8363600587440573, - "sentence_nr": 3 + "score": 0.167672929900467, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9912737182609732, - "sentence_nr": 3 + "score": 0.46910779766306765, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", + "score": 0.47435308668900444, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.40661291646126535, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.07675188745183982, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.39855715351628995, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.6018154975998465, - "sentence_nr": 3 + "score": 0.08351211898903935, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7669980679050217, - "sentence_nr": 3 + "score": 0.33030812447506436, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.07528927678469202, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.422513417362817, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.47320724783393625, - "sentence_nr": 3 + "score": 0.06126604215610123, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5833006006517599, - "sentence_nr": 3 + "score": 0.3837677428398438, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.3556521383601747, - "sentence_nr": 3 + "score": 0.0756907193511249, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.594830811413066, - "sentence_nr": 3 + "score": 0.4138725093679467, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.5406964703993759, - "sentence_nr": 3 + "score": 0.08866637424249016, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5964595329953364, - "sentence_nr": 3 + "score": 0.44876462229383973, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.2575863752355164, - "sentence_nr": 3 + "score": 0.10574428430204418, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.3717184743596148, - "sentence_nr": 3 + "score": 0.45371814600333005, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.22481074167380632, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.3761108267186685, - "sentence_nr": 3 + "score": 0.439334057339686, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.34589895849033114, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.44792042673107413, - "sentence_nr": 3 + "score": 0.4351068027997583, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.08197539732074254, - "sentence_nr": 3 + "score": 0.09084091756463074, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.2552663483401067, - "sentence_nr": 3 + "score": 0.4286741659142759, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.3032929624979452, - "sentence_nr": 3 + "score": 0.11107006417963905, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5077415447889289, - "sentence_nr": 3 + "score": 0.4423176833595632, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.3730786950813075, - "sentence_nr": 3 + "score": 0.11107006417963905, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.47401660085208147, - "sentence_nr": 3 + "score": 0.4423176833595632, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3980589439671235, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.38223593598574, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5729676575997464, - "sentence_nr": 3 + "score": 0.42773026235308964, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4186723698262335, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4357976697626214, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.07649636372749224, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4589644893179388, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.12649672885841734, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.41329609863930566, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.1219449069656942, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4776943038671049, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.09300616718241356, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.380268508362473, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3692592582677176, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.19328966457045355, - "sentence_nr": 3 + "score": 0.4606409590817001, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.20477156411200437, - "sentence_nr": 3 + "score": 0.07996209785853586, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.3371728179865314, - "sentence_nr": 3 + "score": 0.4279137012019699, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.08939270118279458, - "sentence_nr": 3 + "score": 0.1195053737774238, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2952752522340665, - "sentence_nr": 3 + "score": 0.4512136289975786, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.21629114799587432, - "sentence_nr": 3 + "score": 0.21748353646757182, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3542320138389837, - "sentence_nr": 3 + "score": 0.4462746462826943, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.21993356630819796, - "sentence_nr": 3 + "score": 0.24443999371485628, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3822901360655399, - "sentence_nr": 3 + "score": 0.4991016392840656, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.12212865548711085, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.27604929504751197, - "sentence_nr": 3 + "score": 0.34155562837143877, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.08890843444834641, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.39748122457895574, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.07977475994840084, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.007934677500708292, - "sentence_nr": 3 + "score": 0.41362647590171586, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.19984607356962125, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.29326031481052006, - "sentence_nr": 3 + "score": 0.16764957347186446, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.2291132016494709, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.49134530869146475, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.2291132016494709, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.49134530869146475, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.08939270118279458, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2952752522340665, - "sentence_nr": 3 + "score": 0.1139393935967296, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.0008893632159373888, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.11020939962740195, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 3 + "score": 0.12405768590988119, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3337514618651578, - "sentence_nr": 3 + "score": 0.40898021337964235, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.17095864413061523, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2805749649536233, - "sentence_nr": 3 + "score": 0.4077774235667367, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.1701935252826955, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4455315745640286, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.13952118378975725, - "sentence_nr": 3 + "score": 0.08932983819566953, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2962794525145751, - "sentence_nr": 3 + "score": 0.412238728569517, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.10358885722770769, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3618355571813406, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0726047699057895, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.33507193065564506, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -20113,207 +23374,319 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.028735632183908046, - "sentence_nr": 3 + "score": 0.35698569920901285, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.17670087745185423, - "sentence_nr": 3 + "score": 0.1275291133503835, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3136010782144669, - "sentence_nr": 3 + "score": 0.4127884601900206, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.3538966478758119, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.4179644538349004, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.11116091368823534, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.43307401079748475, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.17662903260733673, - "sentence_nr": 3 + "score": 0.07649978886725356, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.359573626731952, - "sentence_nr": 3 + "score": 0.41031664319131844, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.43422015530849367, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.13502367316243039, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.299859745321103, - "sentence_nr": 3 + "score": 0.3867446668969619, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.27274191069381915, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.37436438971100644, - "sentence_nr": 3 + "score": 0.4331286519146886, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4168975501651573, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4173686438800703, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.526589137558171, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5667866238125795, - "sentence_nr": 3 + "score": 0.2626949949898101, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.07209415755182004, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.34065919239335857, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.058344823927322736, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.30347089460185506, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.4345822107320723, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.7155411017347171, - "sentence_nr": 3 + "score": 0.38287207799051226, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3795844422349344, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.6960917409740967, - "sentence_nr": 3 + "score": 0.07012053105310272, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.8209757784637755, - "sentence_nr": 3 + "score": 0.3123716745719453, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.07237963612631294, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3405017838508768, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3681425965156976, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -20321,8319 +23694,9599 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.019516573752972968, - "sentence_nr": 3 + "score": 0.3856352748003268, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.22419056820298167, - "sentence_nr": 3 + "score": 0.0876062628502436, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3577306040313533, - "sentence_nr": 3 + "score": 0.3978552283854932, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.583526016818016, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6994652193905146, - "sentence_nr": 3 + "score": 0.43759229210123524, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.27405612859390877, - "sentence_nr": 3 + "score": 0.10505106462290037, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4639958592456083, - "sentence_nr": 3 + "score": 0.4474870048911137, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4390960897971484, - "sentence_nr": 3 + "score": 0.11534976570369744, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.541742178821102, - "sentence_nr": 3 + "score": 0.46761329904761845, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.13232291594986312, - "sentence_nr": 3 + "score": 0.0487561532099542, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.301901669683193, - "sentence_nr": 3 + "score": 0.3938156291645021, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.43501677302784214, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3166144686275811, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4171828599209745, - "sentence_nr": 3 + "score": 0.4423346652606821, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2434330428491034, - "sentence_nr": 3 + "score": 0.11378204941109882, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.31858900384957733, - "sentence_nr": 3 + "score": 0.4981472095171313, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.15084825228964133, - "sentence_nr": 3 + "score": 0.09058017202418268, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3284886849880412, - "sentence_nr": 3 + "score": 0.47296709670964904, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.6230832293767097, - "sentence_nr": 3 + "score": 0.09058017202418268, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.702540870003671, - "sentence_nr": 3 + "score": 0.47296709670964904, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4452652851854937, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.5889782977654896, - "sentence_nr": 3 + "score": 0.4098374118843212, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.42966148400813586, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3932141708916282, - "sentence_nr": 3 + "score": 0.36147677376770687, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.1536690667279411, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.23373462830676886, - "sentence_nr": 3 + "score": 0.4537465621062763, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.16800102974369996, - "sentence_nr": 3 + "score": 0.07409363550896995, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3319781987745275, - "sentence_nr": 3 + "score": 0.4967565970170675, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.6052987576779449, - "sentence_nr": 3 + "score": 0.11335203496873462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.643602170728296, - "sentence_nr": 3 + "score": 0.46528080200591054, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.13004800471424346, - "sentence_nr": 3 + "score": 0.108829546976023, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.28217142159025543, - "sentence_nr": 3 + "score": 0.4177339268402449, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.3924259174695316, - "sentence_nr": 3 + "score": 0.10755896992103141, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.45050557152077386, - "sentence_nr": 3 + "score": 0.4020332959926386, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.10601317434781207, - "sentence_nr": 3 + "score": 0.10561393446836936, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.2344095627038401, - "sentence_nr": 3 + "score": 0.36231548741660335, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.248781805015534, - "sentence_nr": 3 + "score": 0.008776218574747889, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.24007528246707907, - "sentence_nr": 3 + "score": 0.09026606980896171, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.31084467045503017, - "sentence_nr": 3 + "score": 0.4602880143145438, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.12769027061800275, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.21844360831325868, - "sentence_nr": 3 + "score": 0.4239838444198129, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.4246183605185108, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5497460511936695, - "sentence_nr": 3 + "score": 0.0009218289085545725, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.23705266435224473, - "sentence_nr": 3 + "score": 0.14883746844067872, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3838188339168412, - "sentence_nr": 3 + "score": 0.4476843235219058, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.07999819990926477, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.3889987132692464, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4.1-mini", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.1852972751417938, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4.1-mini", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.36660412101424933, - "sentence_nr": 3 + "score": 0.45182850108544576, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.06058139791899572, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.4408463415099943, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1059352062327485, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2653698485201136, - "sentence_nr": 3 + "score": 0.4291550754056065, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.15311852363615847, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.4119139452120141, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.15311852363615847, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.4119139452120141, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3916082207331212, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 + "score": 0.4665993963816433, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.05840817340853527, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.35342074486617714, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.08841818041776058, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.47772386173878106, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.0678480312173042, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.4265201702004798, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.09171389226334559, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.47356776940569145, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 + "score": 0.4750212573397775, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.12038853897576812, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.47362751515777307, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.07875418272655998, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.38690742073333007, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.003607064963668313, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.09577479457615844, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.11161133657801552, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.4277891734340718, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.3374178992279451, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15653859793617866, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.43177798053127925, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.3848892678578171, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.059281546387121374, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5581982021478125, - "sentence_nr": 4 + "score": 0.31614571419525433, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1550056037604323, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.6364887816884232, - "sentence_nr": 4 + "score": 0.44337732176739364, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11709058244847056, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4050787124470676, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_from", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15720527174368754, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.47882285385622714, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09116976904712065, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.40657327142150135, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09116976904712065, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.40657327142150135, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.2973352934874205, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.008495715968558294, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.1018469620835675, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.22915930804797763, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.47714463990739786, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07925388755947431, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.43795381992037963, - "sentence_nr": 4 + "score": 0.492179971192686, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27560832232663307, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 + "score": 0.5392779492225674, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.46670957224939175, - "sentence_nr": 4 + "score": 0.42348732385732035, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.37544324742239676, - "sentence_nr": 4 + "score": 0.40349510205992334, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06786993616264396, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5344225462130586, - "sentence_nr": 4 + "score": 0.3421484177024472, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08302169728235531, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.49546288984677567, - "sentence_nr": 4 + "score": 0.3447730755591614, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12785320519680665, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.41602211217571683, - "sentence_nr": 4 + "score": 0.46613459917836336, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.18842393723950338, - "sentence_nr": 4 + "score": 0.10106439835419144, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.5646801080937621, - "sentence_nr": 4 + "score": 0.44450926478634867, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0891537192318598, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.49713060327965375, - "sentence_nr": 4 + "score": 0.3970634926176537, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07992844954996121, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.440129802760994, - "sentence_nr": 4 + "score": 0.4400081800535333, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.16195570128532405, - "sentence_nr": 4 + "score": 0.09554681544059333, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.581645267684411, - "sentence_nr": 4 + "score": 0.41422485590617925, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10180993216583333, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.429292711066547, - "sentence_nr": 4 + "score": 0.4132076508398995, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.08613106833807438, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.5848202846227532, - "sentence_nr": 4 + "score": 0.3975508887703536, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0982484177591637, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.41213231348812146, - "sentence_nr": 4 + "score": 0.4109236039282987, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08881356213586242, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.43361166925232786, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07696018952123163, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.34256683873776383, - "sentence_nr": 4 + "score": 0.3952360823843965, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.41477028165511615, - "sentence_nr": 4 + "score": 0.32015903488199987, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14691658254161208, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.33319754264314433, - "sentence_nr": 4 + "score": 0.41606903643662335, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.03929044884480441, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40854152133685306, - "sentence_nr": 4 + "score": 0.3378344273327324, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0758360142139109, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.4164061298971701, - "sentence_nr": 4 + "score": 0.375189380253384, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07649731878156017, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40562290854898025, - "sentence_nr": 4 + "score": 0.3813049788572602, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14483568709851755, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.5709936728721758, - "sentence_nr": 4 + "score": 0.45354858647333196, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10444675051855158, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.38166524283468484, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0598279319535462, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40562290854898025, - "sentence_nr": 4 + "score": 0.36230972213894785, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.056223651641346066, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.34460908274197133, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.33546955366063214, - "sentence_nr": 4 + "score": 0.021139000776129766, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11663764605404517, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40319099863003527, - "sentence_nr": 4 + "score": 0.41241788679161784, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.052359103292999656, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.3805982553288677, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0950136506275681, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 + "score": 0.4372017487229785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.0946260953698702, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3818534926571001, - "sentence_nr": 4 + "score": 0.4353772493110627, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3644112480028862, - "sentence_nr": 4 + "score": 0.15714515459910894, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.391751004221657, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3128264071150837, - "sentence_nr": 4 + "score": 0.42253902305100327, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.16496711525651045, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.393379300802006, - "sentence_nr": 4 + "score": 0.15581507331519903, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.30325797018747325, - "sentence_nr": 4 + "score": 0.1569550310480722, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3892064098781075, - "sentence_nr": 4 + "score": 0.11183036824736405, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3066682918799934, - "sentence_nr": 4 + "score": 0.000885582713425434, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3010381621698183, - "sentence_nr": 4 + "score": 0.0008837044892188052, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.28783297914763095, - "sentence_nr": 4 + "score": 0.45346064531247376, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07024127582598716, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3958941272081701, - "sentence_nr": 4 + "score": 0.47294267112081245, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.5042211795038526, - "sentence_nr": 4 + "score": 0.16543976568828428, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07545713066088315, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.35068174137403757, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.15759802396025455, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.3556085508212346, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4418365362317144, - "sentence_nr": 4 + "score": 0.1113696974855524, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07652593079250605, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.5948724602646328, - "sentence_nr": 4 + "score": 0.3602429629880003, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5049375875723539, - "sentence_nr": 4 + "score": 0.4404222773455128, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1259356760989446, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.511876122662448, - "sentence_nr": 4 + "score": 0.44568274520971096, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1643146814613677, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5582360999449585, - "sentence_nr": 4 + "score": 0.5127730105039489, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.48375513642780327, - "sentence_nr": 4 + "score": 0.41972158348095406, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13550937767032326, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4915933923809756, - "sentence_nr": 4 + "score": 0.503019099672843, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.41469341972645324, - "sentence_nr": 4 + "score": 0.42638157008001054, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12506460115047335, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.39451521279220947, - "sentence_nr": 4 + "score": 0.46140175133635725, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11765201592537665, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5516607622642397, - "sentence_nr": 4 + "score": 0.4747275270084755, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11906127329230787, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 + "score": 0.46393002457493004, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5256353512715748, - "sentence_nr": 4 + "score": 0.3866869165486058, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3765697091436241, - "sentence_nr": 4 + "score": 0.4729772499029102, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3431841258656284, - "sentence_nr": 4 + "score": 0.0009127418765972983, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.27447938256311044, - "sentence_nr": 4 + "score": 0.13870631856463958, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.615291848344044, - "sentence_nr": 4 + "score": 0.48842698915833077, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.31573558123189943, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.6989238098201116, - "sentence_nr": 4 + "score": 0.4238256552423293, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.33118227522229554, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.7010244056936935, - "sentence_nr": 4 + "score": 0.32485028075459577, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1289863677885349, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.6245952145297528, - "sentence_nr": 4 + "score": 0.42110704132809784, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.44995700110278536, - "sentence_nr": 4 + "score": 0.39349347929480366, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.2680165156355779, - "sentence_nr": 4 + "score": 0.11240015835040616, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5989264158576341, - "sentence_nr": 4 + "score": 0.37559985568444276, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09354237835233341, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4425650919372919, - "sentence_nr": 4 + "score": 0.4409559217991841, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 4 + "score": 0.12748506711468208, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.6263180162489238, - "sentence_nr": 4 + "score": 0.4508210683728834, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10833971870416897, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4576529535952892, - "sentence_nr": 4 + "score": 0.4467303749319595, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.16322494183480127, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 + "score": 0.4815584993817062, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08894652425495941, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4726395749383864, - "sentence_nr": 4 + "score": 0.444906007048383, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1029835796838552, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.40052428191473877, - "sentence_nr": 4 + "score": 0.4404518759673606, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.12969927642858944, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.41766761485960235, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.2630268050449769, - "sentence_nr": 4 + "score": 0.3807183676612817, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10127171102984855, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.33762297226992255, - "sentence_nr": 4 + "score": 0.4525620764847558, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14055612605808399, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.39336600752225864, - "sentence_nr": 4 + "score": 0.4124924918007278, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13855644267589232, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.5189004396088754, - "sentence_nr": 4 + "score": 0.402349831140111, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4101715667811344, - "sentence_nr": 4 + "score": 0.2172505600894225, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.07402253274163141, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39107772695242055, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.45834841871997833, - "sentence_nr": 4 + "score": 0.3225074204652331, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.31754227193241025, - "sentence_nr": 4 + "score": 0.45366883374422223, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07686127660362363, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3974726419025883, - "sentence_nr": 4 + "score": 0.3776399755648508, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.12826630655689159, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.6374693500772332, - "sentence_nr": 4 + "score": 0.36561922835086, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.11993418633737256, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 + "score": 0.4771538581125459, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.686947433675709, - "sentence_nr": 4 + "score": 0.370339056755171, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06018687000243035, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4746119151171374, - "sentence_nr": 4 + "score": 0.3434593497880224, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.195647514979229, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.5832256253964303, - "sentence_nr": 4 + "score": 0.049266699072917926, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.2658483576665877, - "sentence_nr": 4 + "score": 0.10041064691273172, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.6410540990527072, - "sentence_nr": 4 + "score": 0.44193101759002734, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09463828889338871, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5639241776831634, - "sentence_nr": 4 + "score": 0.3398200805270262, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.4513165758172509, - "sentence_nr": 4 + "score": 0.0904087252785689, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.7514771576902608, - "sentence_nr": 4 + "score": 0.41830513174690515, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.18237761178381828, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5281061979991509, - "sentence_nr": 4 + "score": 0.4897620961756989, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.13160881951665948, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.6668099404219522, - "sentence_nr": 4 + "score": 0.47196475148373473, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19652148611100978, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5554602680850725, - "sentence_nr": 4 + "score": 0.46276687134854994, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14059082344844442, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.41291750111233794, - "sentence_nr": 4 + "score": 0.45998037393370494, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 + "score": 0.14163299203710986, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.6053635787005981, - "sentence_nr": 4 + "score": 0.3958314877752854, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20815166635049767, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.46426595961938383, - "sentence_nr": 4 + "score": 0.45544025525531057, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20815166635049767, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.45544025525531057, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.2854365802731815, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12575185526286148, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4806367958084579, - "sentence_nr": 4 + "score": 0.4474760312494353, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05007800213368231, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5066311799500233, - "sentence_nr": 4 + "score": 0.3609874593707476, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12906510917536296, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.6562641136790542, - "sentence_nr": 4 + "score": 0.4316039224423657, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10371256119995112, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5013632657267051, - "sentence_nr": 4 + "score": 0.4718081522271149, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.49118110181149666, - "sentence_nr": 4 + "score": 0.3339687893627504, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12408616318856698, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4224991954993499, - "sentence_nr": 4 + "score": 0.3876257744772486, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09676840486068537, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5191362758854317, - "sentence_nr": 4 + "score": 0.4178064649946451, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.5379068753129642, - "sentence_nr": 4 + "score": 0.31855089109915946, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.3348758882377771, - "sentence_nr": 4 + "score": 0.13661459309404012, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07105699030509427, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4601349893675622, - "sentence_nr": 4 + "score": 0.40480969933909144, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17208141302168437, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.410846945789476, - "sentence_nr": 4 + "score": 0.4542522451167506, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.21351902664706998, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 + "score": 0.5130443042033361, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10553179283083523, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.39913709020460375, - "sentence_nr": 4 + "score": 0.4283144779172244, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10814706353513916, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.40443357144012176, - "sentence_nr": 4 + "score": 0.4216597036907072, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11283345429772766, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.42059020279539633, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11757519503939892, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4354398635855642, - "sentence_nr": 4 + "score": 0.4440750544699183, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1200100437012302, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.5705717737418762, - "sentence_nr": 4 + "score": 0.4636227306109079, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2018735691800057, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4121946181418776, - "sentence_nr": 4 + "score": 0.48450292773822007, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17061515620714754, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 + "score": 0.4576821112653066, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.42422145417131013, - "sentence_nr": 4 + "score": 0.38105951101537255, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08941154865546014, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 + "score": 0.3829533851946095, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.5606044053771457, - "sentence_nr": 4 + "score": 0.1950507243360035, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13942317095527404, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.40443357144012176, - "sentence_nr": 4 + "score": 0.45935970393208286, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1266453888967545, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4121946181418776, - "sentence_nr": 4 + "score": 0.4661468774538111, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.1785750235950628, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.6758978744760765, - "sentence_nr": 4 + "score": 0.43344212044851604, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 + "score": 0.16508680260754793, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.5293474685884572, - "sentence_nr": 4 + "score": 0.4714504120878508, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.23578316044531808, - "sentence_nr": 4 + "score": 0.15735432285765638, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.5821373704411671, - "sentence_nr": 4 + "score": 0.47213324077558755, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.14528679532351443, - "sentence_nr": 4 + "score": 0.048498602240297534, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.514952316880994, - "sentence_nr": 4 + "score": 0.3379527543934173, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.21972813874997157, - "sentence_nr": 4 + "score": 0.07862112679032317, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.6022094443409847, - "sentence_nr": 4 + "score": 0.4059503829406287, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.3113878808075066, - "sentence_nr": 4 + "score": 0.14400312819313033, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.6728506998168392, - "sentence_nr": 4 + "score": 0.43256046228969486, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.3128496839849598, - "sentence_nr": 4 + "score": 0.38763756150559275, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.5773502691896258, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.7999099314029202, - "sentence_nr": 4 + "score": 0.16269986423611488, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11960636789197196, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.6371798394308665, - "sentence_nr": 4 + "score": 0.4498565343058379, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06530397960697328, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.30941048637024005, - "sentence_nr": 4 + "score": 0.36899545840843095, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.14718545479171663, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.6423124418413864, - "sentence_nr": 4 + "score": 0.460257245015624, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.04028607466240258, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.2970314818988727, - "sentence_nr": 4 + "score": 0.3532348375098816, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06289570792563275, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.3292499962917628, - "sentence_nr": 4 + "score": 0.3813881170279124, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09920354550190408, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4410492519530161, - "sentence_nr": 4 + "score": 0.39773404106856114, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10026469692688082, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4429196299668147, - "sentence_nr": 4 + "score": 0.4029197068884237, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05364480688581678, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.3678203159539189, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10293517625804853, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.47465074831919213, - "sentence_nr": 4 + "score": 0.41656326594825205, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05484366713129734, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.3482099922682012, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10051147382872337, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.4554345525110637, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.3553498645748245, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.09042147098974282, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4440931655950853, - "sentence_nr": 4 + "score": 0.45055351363359086, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15670253601070666, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.47465074831919213, - "sentence_nr": 4 + "score": 0.4663740399508032, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10313151558812104, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4373156210032521, - "sentence_nr": 4 + "score": 0.40694354343496913, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07012817520173896, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.44830378475308, - "sentence_nr": 4 + "score": 0.3697200811525825, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.08269576405332207, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.2623399284064729, - "sentence_nr": 4 + "score": 0.38185156188383546, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.05014252780397407, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4373156210032521, - "sentence_nr": 4 + "score": 0.38032629067357443, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3844263765000694, - "sentence_nr": 4 + "score": 0.2398247112527542, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.06939838145153245, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1384529882948561, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3805770883173698, - "sentence_nr": 4 + "score": 0.38277790453523536, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1162066330922535, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.3781611496143332, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.08720695571682537, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3539397057594732, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.40508457369784023, - "sentence_nr": 4 + "score": 0.3489532010732457, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.19045679700622437, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.4124342444810736, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13186515857235506, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.4331981792485784, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13020912407699098, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.43037326036891715, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07454232971572508, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3844263765000694, - "sentence_nr": 4 + "score": 0.32060982856396664, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10685362716233777, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.37718436006223416, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10236872486556349, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.3655843515716045, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.4300304626779191, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.39818525322365445, - "sentence_nr": 4 + "score": 0.3423817260769345, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.6474126202050918, - "sentence_nr": 4 + "score": 0.24120995733605022, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.15969495416406884, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.4493675427485572, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.11882576474873885, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.4137309803556429, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.5521590062829653, - "sentence_nr": 4 + "score": 0.3052529256805565, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.6474126202050918, - "sentence_nr": 4 + "score": 0.21229683306385236, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.10500492468363652, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.35306452262726606, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.6131017059052001, - "sentence_nr": 4 + "score": 0.3544628606759813, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.6474126202050918, - "sentence_nr": 4 + "score": 0.3371547585108182, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.6131017059052001, - "sentence_nr": 4 + "score": 0.347335662744532, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.35870004213153, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.697021248528644, - "sentence_nr": 4 + "score": 0.3436708646772823, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.07528359366363899, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.6131017059052001, - "sentence_nr": 4 + "score": 0.3929787160946966, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.1423071532720465, - "sentence_nr": 4 + "score": 0.06980884340032019, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.5673078468780355, - "sentence_nr": 4 + "score": 0.4119294666395687, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.19923405658137924, - "sentence_nr": 4 + "score": 0.05275923024775565, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.6211036406023237, - "sentence_nr": 4 + "score": 0.3724723203846839, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.09212221823733983, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.4009305712746929, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.09212221823733983, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.4009305712746929, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 + "score": 0.05182797087573874, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 + "score": 0.3356084649197975, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.06394979207378956, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 + "score": 0.38558242957188676, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.38509368159944835, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.07237989995674375, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.37854508937809583, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 + "score": 0.3862862497500016, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.08964131615841985, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.3962711438859162, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 + "score": 0.0842991091574967, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 + "score": 0.37839472970450666, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.08929303071508352, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.3812590700275906, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.37026342464744205, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.021188116207401797, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 + "score": 0.059737095980317775, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 + "score": 0.33742048813623593, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.09676230489828269, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.43266369498706486, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.1691386174483793, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.4920789340026317, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.26337200877742073, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.5332559901749826, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.12820355595850366, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.3621325951848801, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.09626574733625733, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.42477685654731595, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.07980949791887201, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.41420415271817995, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.12650809806003369, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.4579202271851988, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.1579087295499776, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.45872425820649376, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 + "score": 0.1579087295499776, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 + "score": 0.45872425820649376, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.27031481031630283, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.0008494733265375468, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.06811376279492601, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.39733701209881217, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.4580958606533364, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 + "score": 0.0764396382727319, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 + "score": 0.479028094818329, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.1348878985611687, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.4519788039621858, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.11248337299167142, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.47408452973613896, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.11248337299167142, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.47408452973613896, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.36955183141564213, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.4441961115027302, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.7565542718609186, - "sentence_nr": 5 + "score": 0.03735667108797313, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.15907551162629324, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.2828367156737383, - "sentence_nr": 5 + "score": 0.4524235916096891, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44543578807748957, - "sentence_nr": 5 + "score": 0.2356661678654945, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.7513336773729535, - "sentence_nr": 5 + "score": 0.5124350706386419, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.14944432524273302, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.4972796478830659, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0.2159761642923436, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.7341375356694393, - "sentence_nr": 5 + "score": 0.5016920563570307, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 + "score": 0.2025321568231461, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 + "score": 0.48513287721314796, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.2404315522172745, - "sentence_nr": 5 + "score": 0.24158268888989357, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.49155714102395526, - "sentence_nr": 5 + "score": 0.5410427858408839, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.3477250470582593, - "sentence_nr": 5 + "score": 0.19135523280427486, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.7188419868243952, - "sentence_nr": 5 + "score": 0.49947805136320467, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.20689377284100188, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.499181874773421, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.2040063460127264, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.49119437313905046, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.2192257678937897, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.4939069969762876, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.19989672437010242, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6392900613840917, - "sentence_nr": 5 + "score": 0.4954677323302961, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.4522625000672462, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.231679638185573, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.5128772172047342, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.15455087483252075, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6392900613840917, - "sentence_nr": 5 + "score": 0.4692608507135787, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.21119143803941876, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.5030156541243803, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.4386229919587297, - "sentence_nr": 5 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.11469202308276233, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.47736356509677796, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.5379348324975908, - "sentence_nr": 5 + "score": 0.10720391954020723, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.7703766110349561, - "sentence_nr": 5 + "score": 0.37219605281253065, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 + "score": 0.09793316925795417, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 + "score": 0.4297577431879659, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.22391522968021457, - "sentence_nr": 5 + "score": 0.10461993210426317, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6087618281135659, - "sentence_nr": 5 + "score": 0.411539574168363, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.09232875412879928, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.40574729737089493, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.06106711519028805, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.35756654876490823, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.379821157654799, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.10588612806056373, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.4068718481729766, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.10349275234623093, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.39646229864034915, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.10078272841498563, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.4064722660884474, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.054115495307563, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.3261514049985403, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.38160462422538616, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.09769679999383253, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.4621757041594117, - "sentence_nr": 5 + "score": 0.34898418602745346, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.22067731046885494, - "sentence_nr": 5 + "score": 0.12044433900881439, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5635661737033422, - "sentence_nr": 5 + "score": 0.4241018127470702, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.33260316469843504, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 + "score": 0.06986280403265237, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 + "score": 0.35513587606181224, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.11511385959745848, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.43904137765866535, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.6626129614342791, - "sentence_nr": 5 + "score": 0.055494510332021456, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.8597893117683423, - "sentence_nr": 5 + "score": 0.3537313942209114, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.4727805712999679, - "sentence_nr": 5 + "score": 0.04637573483688681, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "microsoft/phi-4", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.7717158158167359, - "sentence_nr": 5 + "score": 0.34947742719726066, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.24802563498261762, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.15017237887090715, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.7689532399280165, - "sentence_nr": 5 + "score": 0.4481403580546466, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.5465526716276092, - "sentence_nr": 5 + "score": 0.06851723496815999, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.8012679276648627, - "sentence_nr": 5 + "score": 0.40911149660575097, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.14184998906630783, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.44498159653494584, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0.098684745093626, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7338978299765546, - "sentence_nr": 5 + "score": 0.4007829842063641, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.5461499540157965, - "sentence_nr": 5 + "score": 0.12459539355692184, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7954823723658209, - "sentence_nr": 5 + "score": 0.4660673682008178, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.24011079455637607, - "sentence_nr": 5 + "score": 0.43650150602378024, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.19920494035049138, - "sentence_nr": 5 + "score": 0.16852726931996573, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.614209720001149, - "sentence_nr": 5 + "score": 0.47700454282460186, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.5896613549548209, - "sentence_nr": 5 + "score": 0.0967458811247473, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7528914749586836, - "sentence_nr": 5 + "score": 0.4485783191522753, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.14259457481008844, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.44333951056770365, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.5300714512917181, - "sentence_nr": 5 + "score": 0.12929801979373398, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7461630750708693, - "sentence_nr": 5 + "score": 0.4481163949580147, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.05089649055811939, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.332316583093035, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.43007078231141604, - "sentence_nr": 5 + "score": 0.1215444014819075, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.6680243275130087, - "sentence_nr": 5 + "score": 0.47810748222746724, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.07454524508730044, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.4226523751275279, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.33359103227594633, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.701102363286568, - "sentence_nr": 5 + "score": 0.45094066627298346, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.5896613549548209, - "sentence_nr": 5 + "score": 0.12140604678816087, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7528914749586836, - "sentence_nr": 5 + "score": 0.4867949101127361, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.5271017464925504, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7749613594649343, - "sentence_nr": 5 + "score": 0.4257191895245898, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 + "score": 0.4447505575009147, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.06790750885658495, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.36461852531844696, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.06580605405558394, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.39253122769812004, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.4335364472118335, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.6878319610579101, - "sentence_nr": 5 + "score": 0.34130342683703757, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 + "score": 0.09309060319054646, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.7032048786770096, - "sentence_nr": 5 + "score": 0.4298405489249517, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.4801289744823913, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6766690087429765, - "sentence_nr": 5 + "score": 0.36741937011390374, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 + "score": 0.11220450894323894, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.46129962837218175, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 + "score": 0.1495364895327543, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.47363863746713725, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.30421485886156485, - "sentence_nr": 5 + "score": 0.09996446612314541, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.566236392445952, - "sentence_nr": 5 + "score": 0.38170822021413087, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.32965129549221617, - "sentence_nr": 5 + "score": 0.06387054393436563, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.623436907204599, - "sentence_nr": 5 + "score": 0.3815759231072373, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.10226898445462904, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.24706467963183681, - "sentence_nr": 5 + "score": 0.31602168772086425, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.32965129549221617, - "sentence_nr": 5 + "score": 0.13308739447486365, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.623436907204599, - "sentence_nr": 5 + "score": 0.3932447622969156, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.32078739729528816, - "sentence_nr": 5 + "score": 0.15945111742363355, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5817366082116868, - "sentence_nr": 5 + "score": 0.3874937023246531, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.32078739729528816, - "sentence_nr": 5 + "score": 0.16222984538693397, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5817366082116868, - "sentence_nr": 5 + "score": 0.3853884541190584, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 + "score": 0.04590350535783419, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 + "score": 0.3590907949514895, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 + "score": 0.06553112247619124, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 + "score": 0.34736601570005654, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.44332438338421004, - "sentence_nr": 5 + "score": 0.28746043870472343, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3231203125477008, - "sentence_nr": 5 + "score": 0.08090780486139712, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5812275690118908, - "sentence_nr": 5 + "score": 0.39801116539473197, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 5 + "score": 0.13634149384842567, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5764325110247531, - "sentence_nr": 5 + "score": 0.45660196324556507, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 + "score": 0.10539949748136965, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 + "score": 0.43372525820935726, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 5 + "score": 0.08313078148769443, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5878575558111695, - "sentence_nr": 5 + "score": 0.37751928853187794, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 + "score": 0.11705228389611827, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 + "score": 0.3876750316897958, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3665134361137304, - "sentence_nr": 5 + "score": 0.06945941539174287, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6118771029352303, - "sentence_nr": 5 + "score": 0.37532824377184637, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3665134361137304, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6118771029352303, - "sentence_nr": 5 + "score": 0.0017825311942959, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 + "score": 0.060825626903085836, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 + "score": 0.367772007695087, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.4085639059221913, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5887623870312143, - "sentence_nr": 5 + "score": 0.3275292968031138, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.3485799122645514, - "sentence_nr": 5 + "score": 0.08932983819566953, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6090575371936678, - "sentence_nr": 5 + "score": 0.37462132890676997, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.12151683896637884, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.06088829927112382, - "sentence_nr": 5 + "score": 0.3874631848880938, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.4100134571476398, - "sentence_nr": 5 + "score": 0.10204941450542204, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5856608401367807, - "sentence_nr": 5 + "score": 0.3642762802151916, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.17098323692758396, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5216877937894046, - "sentence_nr": 5 + "score": 0.4198632525977369, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.3527295712700594, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.6062826429226292, - "sentence_nr": 5 + "score": 0.40030811252315135, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.6471892368478446, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.8142499721936278, - "sentence_nr": 5 + "score": 0.3645369664653625, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 + "score": 0.07197294819814848, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 + "score": 0.32449572804662435, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.0888682354086992, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.339535780138334, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.40202477345336673, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.7469480084357536, - "sentence_nr": 5 + "score": 0.2954316287998063, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.42612283570374254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.7185121839177114, - "sentence_nr": 5 + "score": 0.13508587062654778, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.2799331151961311, - "sentence_nr": 5 + "score": 0.20625143302701235, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.40157733283424196, - "sentence_nr": 5 + "score": 0.10744704576329998, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.7133166401137868, - "sentence_nr": 5 + "score": 0.4113914052054571, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.061599223508298384, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.39071299620062516, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.4625957988586645, - "sentence_nr": 5 + "score": 0.08121271060180286, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.7494665344743727, - "sentence_nr": 5 + "score": 0.38347124715279823, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.42612283570374254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.7185121839177114, - "sentence_nr": 5 + "score": 0.346134163535414, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.3120575186023678, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.12703931399200974, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.36503713996419634, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5199388279318895, - "sentence_nr": 5 + "score": 0.25122591039975606, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.23141570376732995, - "sentence_nr": 5 + "score": 0.142144689462689, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5938624587877649, - "sentence_nr": 5 + "score": 0.4331731101712559, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.23713320246552005, - "sentence_nr": 5 + "score": 0.1383193561213217, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6106842970161642, - "sentence_nr": 5 + "score": 0.4229717720106369, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 5 + "score": 0.14846392828893068, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5384773678665918, - "sentence_nr": 5 + "score": 0.44939103256256696, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 5 + "score": 0.12146424147064877, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5786592584609213, - "sentence_nr": 5 + "score": 0.4236242053572171, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.22128776529156546, - "sentence_nr": 5 + "score": 0.1094074965643601, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5609439249510223, - "sentence_nr": 5 + "score": 0.40564547968508147, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.15220822456235364, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4115596298004114, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5502221839528678, - "sentence_nr": 5 + "score": 0.37988778643556725, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 5 + "score": 0.15453746478246141, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5786592584609213, - "sentence_nr": 5 + "score": 0.4413516563123831, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.1998573974138024, - "sentence_nr": 5 + "score": 0.057437241172269976, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.540043957078071, - "sentence_nr": 5 + "score": 0.35678099544211456, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.3282518529729176, - "sentence_nr": 5 + "score": 0.054740285583782515, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6453010665294326, - "sentence_nr": 5 + "score": 0.3326088942846013, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.30752616970214336, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6051452460471443, - "sentence_nr": 5 + "score": 0.21262860902391906, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 + "score": 0.145714476532026, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.310441435588881, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6413164971104282, - "sentence_nr": 5 + "score": 0.09959979931751661, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 5 + "score": 0.11584671534637218, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5814841210741494, - "sentence_nr": 5 + "score": 0.43405442508386655, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.22656720908801994, - "sentence_nr": 5 + "score": 0.10824174794790033, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5465750236858569, - "sentence_nr": 5 + "score": 0.4249858140828141, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.6888365053466561, - "sentence_nr": 5 + "score": 0.13364464646895982, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.8656273480576243, - "sentence_nr": 5 + "score": 0.420485716773103, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 + "score": 0.10642944544652122, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 + "score": 0.4272539643561774, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 + "score": 0.09994208395394132, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 + "score": 0.4116031651908585, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.3416581331218724, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6578570934289981, - "sentence_nr": 5 + "score": 0.30779033808655853, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.629934465484704, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.6736142284622013, - "sentence_nr": 5 + "score": 0.11665236403515139, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.3423591961656694, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6570214418399444, - "sentence_nr": 5 + "score": 0.3539070801331386, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.6888365053466561, - "sentence_nr": 5 + "score": 0.11340129142744679, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.8656273480576243, - "sentence_nr": 5 + "score": 0.4168800407013454, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 5 + "score": 0.07438681343481453, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.629934465484704, - "sentence_nr": 5 + "score": 0.3894532190798538, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 5 + "score": 0.12289012856297825, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 5 + "score": 0.4301477375362509, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6736142284622013, - "sentence_nr": 5 + "score": 0.42269142120170566, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 5 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.46965980060137014, - "sentence_nr": 5 + "score": 0.4070049032521835, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.24456656109396324, - "sentence_nr": 5 + "score": 0.10713148568717314, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.629934465484704, - "sentence_nr": 5 + "score": 0.41522111700393083, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.5069487414732323, - "sentence_nr": 5 + "score": 0.09117705217713716, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.7801245319017357, - "sentence_nr": 5 + "score": 0.43706480660762204, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 + "score": 0.09117705217713716, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 + "score": 0.43706480660762204, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.6358974376699329, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.736661937085844, - "sentence_nr": 5 + "score": 0.1585163492096374, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 + "score": 0.3944339442252199, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6935397252637394, - "sentence_nr": 5 + "score": 0.29010329480545366, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6935397252637394, - "sentence_nr": 5 + "score": 0.41628264791417435, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 + "score": 0.402380136220731, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.11765941642483725, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 + "score": 0.44984502263523063, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 + "score": 0.3814511364616612, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.45307778036928104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6935397252637394, - "sentence_nr": 5 + "score": 0.3783361329472209, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 + "score": 0.31731158276047144, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.30614023358320086, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5870676308171808, - "sentence_nr": 5 + "score": 0.2862195367689212, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.2281399713503153, - "sentence_nr": 5 + "score": 0.08824413655138029, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6211104268881504, - "sentence_nr": 5 + "score": 0.3977541835364748, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 + "score": 0.3241317524160092, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.07351652222518425, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.3862617013651048, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.38224927613981324, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.37754295227618245, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09789100211394652, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.3998935506194311, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.3292309575174233, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.3702987017023586, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.15576268293490025, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.15559260728522314, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.17228284869075539, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.0008931761343336905, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.09318751734659178, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 + "score": 0.07105722763151533, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 + "score": 0.3833002336205578, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 + "score": 0.06699965311966219, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 + "score": 0.39535625904035426, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.7667541011433795, - "sentence_nr": 5 + "score": 0.2594621783720232, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.7196315267102845, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.8835331636515565, - "sentence_nr": 5 + "score": 0.37756676543137707, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.26779197752407163, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.2821488741696276, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.480771131185851, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.705252762035012, - "sentence_nr": 5 + "score": 0.19258818005939538, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.445107576642247, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6955301378913092, - "sentence_nr": 5 + "score": 0.3162255423673242, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.40157733283424196, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6532350818978572, - "sentence_nr": 5 + "score": 0.3499024158832446, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 5 + "score": 0.12587301409115934, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6438225861756911, - "sentence_nr": 5 + "score": 0.43278573034203477, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.31374450602681464, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6422405832556486, - "sentence_nr": 5 + "score": 0.08072859763900794, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.4924584878270648, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.7062510642584722, - "sentence_nr": 5 + "score": 0.3683463348059566, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.07801862165263017, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3649626948604655, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5090396683756193, - "sentence_nr": 5 + "score": 0.31673355623145183, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.30752616970214336, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5976254557718147, - "sentence_nr": 5 + "score": 0.30718853768673293, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.08704115681414555, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.30654218553615703, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.1218935493102045, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3464538940382064, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.36227557436010244, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6470050797908481, - "sentence_nr": 5 + "score": 0.16140904075314855, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.11038950880611041, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.08923623051454435, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.45506803308128024, - "sentence_nr": 5 + "score": 0.1623104334751586, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.7117510256855165, - "sentence_nr": 5 + "score": 0.4771296526729911, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.419468515826214, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6664000694648706, - "sentence_nr": 5 + "score": 0.3947523667337686, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.010321080079207262, - "sentence_nr": 5 + "score": 0.09092206673648158, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.06492787287290114, - "sentence_nr": 5 + "score": 0.4200146131210127, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.4938015541936678, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.7820348786317745, - "sentence_nr": 5 + "score": 0.3701108638788564, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.06534705417606954, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.36650330882957155, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.10505472012980112, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.12858902882463452, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.35477908164501704, - "sentence_nr": 5 + "score": 0.10823255315380634, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6316031412228033, - "sentence_nr": 5 + "score": 0.10849792605943348, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.3961285597009415, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6148751441350505, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.4923751299732868, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6853756490381199, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.3996712647649035, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6353525755760105, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.5115346945020283, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 + "score": 0.7037574715738644, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.539613476474223, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7656669576227642, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.44323526910431466, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.7203175886481126, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.38870674200492367, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6484380084879691, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4803366376640879, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4803366376640879, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.24715873794308874, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.49051792813181655, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.24165618759503896, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4975964813831709, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3399292774084129, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.24508104771894088, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.5725552336126134, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.33608213382072566, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6155314069125684, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.5091686102291805, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7212096271583858, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.059225812220444374, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3415915215786091, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.20801258614305904, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.26703508536995574, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.35315040956049437, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.625895188503691, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3390387389794623, - "sentence_nr": 6 + "score": 0.17374951565433233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6170420596680538, - "sentence_nr": 6 + "score": 0.45325597884524305, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 + "score": 0.17743299460161885, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 + "score": 0.43071271897416463, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 6 + "score": 0.16052654068024738, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6279894552667558, - "sentence_nr": 6 + "score": 0.41580120868053494, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 + "score": 0.05963579607071745, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.551397074868541, - "sentence_nr": 6 + "score": 0.31139762378406344, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.16882878525950934, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.4916588179493449, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.06724888422961112, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.39667480605700844, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 6 + "score": 0.11133996756497437, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5403400891349619, - "sentence_nr": 6 + "score": 0.4410280353998367, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3060368950930089, - "sentence_nr": 6 + "score": 0.11930191477839873, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6498981440676681, - "sentence_nr": 6 + "score": 0.38127647898994615, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.11930191477839873, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 + "score": 0.38127647898994615, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.11346446511593337, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5460240376042262, - "sentence_nr": 6 + "score": 0.3675317022605926, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.1138130543686042, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.37129771776482284, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.24343304284910333, - "sentence_nr": 6 + "score": 0.09880177230676102, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6275577931282961, - "sentence_nr": 6 + "score": 0.3297638349619511, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 + "score": 0.2377604053257556, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5690390533910819, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.22573408807826306, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5444672928195973, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10742716472890976, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.42694859148910824, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.19073363590503933, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.49895382941569383, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.2382348118072364, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5209651952837622, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.04378394322066971, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3208878744579877, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.14745870033404418, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6431872581462166, - "sentence_nr": 6 + "score": 0.475170637938921, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.26801022984888695, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.5654883864995515, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.21665407194210906, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.4344921442639243, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.2735429726790281, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.5644723203818537, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 6 + "score": 0.20223322445648179, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6589376390020449, - "sentence_nr": 6 + "score": 0.5084057058209687, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2783519250162903, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5348467461069981, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.43021236941942204, - "sentence_nr": 6 + "score": 0.2868985878682555, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7142896582178452, - "sentence_nr": 6 + "score": 0.591501744009396, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4216890913810254, - "sentence_nr": 6 + "score": 0.26459538953931094, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6885217194158456, - "sentence_nr": 6 + "score": 0.5272178908335121, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21339518760946108, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.48087345520828384, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21339518760946108, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.48087345520828384, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 6 + "score": 0.012201453805310429, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6589376390020449, - "sentence_nr": 6 + "score": 0.063050817196087, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13144312778782385, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.39885548494708645, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.17546787062208544, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.26318271943814925, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.24342570806900707, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.5341782261409304, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.29176300840900793, - "sentence_nr": 6 + "score": 0.2941675978352054, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6143650111703199, - "sentence_nr": 6 + "score": 0.5906648119376218, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.43021236941942204, - "sentence_nr": 6 + "score": 0.25848476545940924, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7142896582178452, - "sentence_nr": 6 + "score": 0.5525933856866961, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.24071298960902482, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.5438509851618877, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30181468526956173, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5742266289862643, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1524391967594511, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.40401082182864007, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.24090844358935917, - "sentence_nr": 6 + "score": 0.2063890416514164, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5468852870478801, - "sentence_nr": 6 + "score": 0.38567678850872256, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.3295566054952435, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.5816133441895466, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.3574583793293068, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.5924115119819969, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.37994652561206577, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.6464467277069994, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.4206507730319955, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.678851303587664, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.35367180741660353, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.6344846206551544, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4583059140964007, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6572591475957218, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 + "score": 0.3428955163829333, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7745649676018984, - "sentence_nr": 6 + "score": 0.6023036718160529, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.6037023613177924, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.23476486638430955, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4773200602980358, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.24071298960902482, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.48246317686203916, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.2632018059331281, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.501302719796297, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.21190235548035158, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.42394616175486477, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.2026004770366011, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.26423618098743384, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.4538010737057216, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.6446834621229663, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.4537725564472931, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.6400294742979378, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.20586736678432452, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.5693016623172978, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.1455399826828606, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.4504825146558032, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4537725564472931, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6472259254275322, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.3224480215988163, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5927918264487643, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.175866555062937, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.2758687846643748, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.2948978498692003, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.5529453973837751, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.22292726306270316, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5653789747970112, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.09362261118571368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.3452056942265759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.26930937054323245, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5410704185827219, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.24634765861867908, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.55968513851572, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.26639861901254025, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5645550913055297, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.29037747307996287, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5917006930610393, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.2158914621804855, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5448184155666022, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14354007514054795, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4107294810888034, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14382854899355546, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4429424015358985, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.39152357647177133, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3628970589000386, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.26380332796984857, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.4054234087021839, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.667304751638097, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.0197185083394535, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.08175340974854195, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.4308342322390109, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.3268233487541633, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.6084114123608597, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.39970543200306696, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6701057243509293, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.057855095943418054, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3713083115938122, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.39545121937832856, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6963801389253689, - "sentence_nr": 6 + "score": 0.04759937639788563, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.2500653935141143, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.585528867886047, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4101479464529936, - "sentence_nr": 6 + "score": 0.21403222128228389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7041976254287654, - "sentence_nr": 6 + "score": 0.563121432204311, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 + "score": 0.18917620656425485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 + "score": 0.4346170232980484, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2919394073770869, - "sentence_nr": 6 + "score": 0.18505378795140082, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5957961314949175, - "sentence_nr": 6 + "score": 0.47051087423292237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.17537670874647399, - "sentence_nr": 6 + "score": 0.09807167131529582, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4800889669735933, - "sentence_nr": 6 + "score": 0.4646043403137081, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.17246578136934215, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4978075400551749, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3347189874003768, - "sentence_nr": 6 + "score": 0.12049505059461789, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6644203374869264, - "sentence_nr": 6 + "score": 0.49305039430054654, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 + "score": 0.18031307339768174, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 + "score": 0.522164454804456, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.010790603731135676, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.08933760969291045, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.08321246411701612, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4202131284704273, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.22845493240080628, - "sentence_nr": 6 + "score": 0.037874984245935134, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.584996891148118, - "sentence_nr": 6 + "score": 0.13083094614009624, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.07811070608237462, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.36587492277430317, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.01048349685717822, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.055565415325616714, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.30865045220428267, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.61517480898171, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 + "score": 0.2846319621273652, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 + "score": 0.6104886550509491, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.23272696712467975, - "sentence_nr": 6 + "score": 0.20064110494011925, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5794868721814046, - "sentence_nr": 6 + "score": 0.5205761630334527, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.38785611216800814, - "sentence_nr": 6 + "score": 0.1573857459340795, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6673259967761724, - "sentence_nr": 6 + "score": 0.5347526444819753, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.17664836757774102, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5371605646337243, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.060180778682989704, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.29283927549819116, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.16331948281960493, - "sentence_nr": 6 + "score": 0.008180069062416927, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.350650198151987, - "sentence_nr": 6 + "score": 0.04605877529742035, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.8056920633274978, - "sentence_nr": 6 + "score": 0.13904320686250593, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.8391519966182309, - "sentence_nr": 6 + "score": 0.47169365083525167, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 + "score": 0.4054983797456263, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 + "score": 0.6264774230839022, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.420450507904553, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6503146347305717, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.43870712112271204, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6525926696001584, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.4207445490015154, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6496192656497308, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.48775137729295837, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7197897786887384, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.36781689904382464, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6454684777803729, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.2999092588227898, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.5505916495384416, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.24344100611585126, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.4904649875193978, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.24344100611585126, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.4904649875193978, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.2772655014585435, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.4799723286048352, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.05694565324984518, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.10703784430638943, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.1873975519857385, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.31466719188226244, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.44723347386851464, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.6623509160762053, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 + "score": 0.2516768028374535, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7971172820981081, - "sentence_nr": 6 + "score": 0.49572209766846287, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.3471036105446511, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 + "score": 0.5663019495273462, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.40468508029421657, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6187842061799859, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.31229804316214244, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5472538279760955, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.009070964338765818, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.06852404470758497, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3212066202235163, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5836558214123343, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20051119758906127, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5334791309401924, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24894072982768842, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5212235893093335, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.23724642034775328, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 + "score": 0.5175129869169551, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.4207937380724192, - "sentence_nr": 6 + "score": 0.1849419409628554, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6985308026285912, - "sentence_nr": 6 + "score": 0.5067677916637257, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.31892230912492575, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5875084575724192, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 + "score": 0.49804532928450235, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.8020845125558708, - "sentence_nr": 6 + "score": 0.4263684749347053, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.41288994939846857, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.38356258581337616, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 + "score": 0.4228574070038002, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36267371851477537, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0567604154331994, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.29827454153543537, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 + "score": 0.39464291294535175, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7945212279546889, - "sentence_nr": 6 + "score": 0.6481030286156323, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 + "score": 0.18856993172601796, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5700185304500285, - "sentence_nr": 6 + "score": 0.5742215470122527, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 + "score": 0.11126509848873964, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7945212279546889, - "sentence_nr": 6 + "score": 0.4338923576538663, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.43011383006801057, - "sentence_nr": 6 + "score": 0.17236491061326006, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7140577175386648, - "sentence_nr": 6 + "score": 0.5548663878579595, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2383229027835547, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5742431738972974, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3797999858273739, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -28641,8607 +33294,9999 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3843363395779093, - "sentence_nr": 6 + "score": 0.3540506408782035, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.08906092883748383, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7369844404912368, - "sentence_nr": 6 + "score": 0.4317746285352776, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.3535002370419364, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5959879218348465, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4896430866960958, - "sentence_nr": 6 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7719180936906627, - "sentence_nr": 6 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.38333108639273095, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6590438071804039, - "sentence_nr": 6 + "score": 0.6252821653079126, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3938668169322973, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6503960273855587, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.30783677787322206, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.6417108947268295, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.2562849004088193, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5767019342009202, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1708279669132677, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4638787181207799, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.19851743023355672, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.49793621556542356, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.11749734515861608, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3800781530302128, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.16925466459550803, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.32423541824465607, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.44464935391849836, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.696223947326307, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.393613605227227, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.645368276087673, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 + "score": 0.10954782904363085, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 + "score": 0.5090382887002297, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.2834484329788497, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5201572704778937, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.33485170416778803, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5864709039906052, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.25109549502043527, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5187154028710355, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 + "score": 0.006569332862878646, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7745649676018984, - "sentence_nr": 6 + "score": 0.060864196135666904, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.2756885721075884, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.5867077870431389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.35983766090218355, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.5862251404739759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 + "score": 0.41020178654369294, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.34636800712900173, - "sentence_nr": 6 + "score": 0.3563982585943877, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5167955767158704, - "sentence_nr": 6 + "score": 0.5378970484635915, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.3675667565747676, - "sentence_nr": 6 + "score": 0.1510722413165652, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5397693417183738, - "sentence_nr": 6 + "score": 0.43592329727028295, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.32762442529164815, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5817308767051309, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.421151249507493, - "sentence_nr": 6 + "score": 0.2162882016096312, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6889522290200047, - "sentence_nr": 6 + "score": 0.5347607537943324, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.2465888500427759, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.5221084445696768, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.19375900698784013, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4417223478092842, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.19375900698784013, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4417223478092842, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.425143650778693, - "sentence_nr": 6 + "score": 0.13240628161243978, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6674242019044293, - "sentence_nr": 6 + "score": 0.3347576434758551, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14990549297172232, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3679304885604264, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.15955011685514067, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.30360440229073266, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.37081839104772296, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.6563960892873841, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.421151249507493, - "sentence_nr": 6 + "score": 0.43674417649448877, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6938674571170766, - "sentence_nr": 6 + "score": 0.6420436201302845, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.31372333533981844, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.5741396495481692, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.4583603882613907, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.671355324267905, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4577065720375266, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6651583454016877, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.09074542617661965, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.34743483318854457, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.41843795218458035, - "sentence_nr": 6 + "score": 0.10077062063331403, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6316283876832989, - "sentence_nr": 6 + "score": 0.09760482860544632, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.4803501444747088, - "sentence_nr": 6 + "score": 0.2600884210903425, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7417101158248365, - "sentence_nr": 6 + "score": 0.531430106996609, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.42221847853238736, - "sentence_nr": 6 + "score": 0.2971752224486841, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.6656008733100179, - "sentence_nr": 6 + "score": 0.605133664481872, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.2329856851831642, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5405751250637106, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.2563564295134795, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5499025328773104, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.2500653935141143, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5098952451698188, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.3410035628829697, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.6673519558586546, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.2537710754125116, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5898314098579354, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.22150370805587954, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5463488388082953, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.18220310573875692, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4903627893118491, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.18220310573875692, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4903627893118491, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.14574402656519908, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.4275543759804943, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.14917974712716253, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3881363329343739, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.13469741495510942, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.23441276321629778, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.3319944964021059, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.6676204564640195, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.3410035628829697, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.6673519558586546, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.24750028117795922, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5894646098566614, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.2687379663485886, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.5826619907747026, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.33197363357608073, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.6622924173503438, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.1549337617358287, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.46129538624370314, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.15897333608001968, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.4496168003395693, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0.15089318423122544, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.4042478943311393, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4481489512240194, - "sentence_nr": 6 + "score": 0.2786312783602775, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7994721822064033, - "sentence_nr": 6 + "score": 0.4836796407825139, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.41756686236967944, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.5616829345739638, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.4205004825822372, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.5674537639314233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.38754077501151757, - "sentence_nr": 6 + "score": 0.40974323819644953, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.598503332887995, - "sentence_nr": 6 + "score": 0.5368112087257564, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.3942058093215873, - "sentence_nr": 6 + "score": 0.3802176949775883, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6619197609807324, - "sentence_nr": 6 + "score": 0.5452668934611525, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.5124776602965491, - "sentence_nr": 6 + "score": 0.31664648301486426, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7722874800637285, - "sentence_nr": 6 + "score": 0.5144731747994161, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.3291598889023262, - "sentence_nr": 6 + "score": 0.39461811323775403, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6085546680624175, - "sentence_nr": 6 + "score": 0.5655204109921267, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 + "score": 0.2460808118695353, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 + "score": 0.423121369921965, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 + "score": 0.24527664931927695, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 + "score": 0.4209094498428288, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.219672574669477, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.37413906702142435, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.6173766800527999, - "sentence_nr": 6 + "score": 0.23772058435392884, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.857390040146912, - "sentence_nr": 6 + "score": 0.41428526424536166, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.18227211511988975, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.3470839302425112, - "sentence_nr": 6 + "score": 0.3382801865692036, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.4845766087853281, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7138566289355139, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.4624783675017329, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5512324461754572, - "sentence_nr": 6 + "score": 0.6143805843064278, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.3116520879159789, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.7289444696770301, - "sentence_nr": 6 + "score": 0.47517792402030584, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.3737098172408067, - "sentence_nr": 6 + "score": 0.3627923367798331, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.6832201170000932, - "sentence_nr": 6 + "score": 0.5255399246733422, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.3425544777717761, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.50704305966035, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.2209272285481984, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.39594412177016736, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.12870376210497989, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.22151451171035633, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.30181468526956173, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5261802780475523, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.39579112101105834, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6431490866428237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.38189567401226293, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6154314825900052, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.3436153961225413, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5954254642696512, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.4339219137216798, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6338401824373191, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.4867425155088891, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.694902422243332, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.3842999367495742, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5816504483384909, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.25564177137418986, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.49870011615602194, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.25827391508911896, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4946279037884096, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.25827391508911896, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4946279037884096, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.27946415227589155, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.4938296655037709, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.23770995610324924, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4452519937393593, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.28833961053297996, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.45958301505159854, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.22436571657855092, - "sentence_nr": 6 + "score": 0.546590882357469, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.61166969974579, - "sentence_nr": 6 + "score": 0.7387035187469114, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.22436571657855092, - "sentence_nr": 6 + "score": 0.42605707394192827, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.61166969974579, - "sentence_nr": 6 + "score": 0.6273945682408745, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.26925601229087914, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.6050395148484196, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.43235877156651625, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.614485867381761, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.5140564859600669, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6829131724977509, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.30179316261741523, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5734438794851154, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.22423870508323301, - "sentence_nr": 6 + "score": 0.012951112459987979, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.6366515193698862, - "sentence_nr": 6 + "score": 0.11882277038397698, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.3272963527043486, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.5582775802710993, - "sentence_nr": 6 + "score": 0.5715613564297359, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 + "score": 0.35253338922743144, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.8215788698315908, - "sentence_nr": 6 + "score": 0.6487975154557831, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.2126707920684064, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.4659908460634765, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.2914880531303981, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.5962886968213414, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.6507561416639396, - "sentence_nr": 6 + "score": 0.23944666570758283, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.8215788698315908, - "sentence_nr": 6 + "score": 0.5106509239874657, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.3157230363707331, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6026946145401261, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.5967384019266717, - "sentence_nr": 6 + "score": 0.2258043389079604, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.8544348080833218, - "sentence_nr": 6 + "score": 0.528195718512124, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.15985840708020788, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.44951053332729884, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1549337617358287, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4181663627917128, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1549337617358287, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4181663627917128, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.17920531400657588, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.4522763055702811, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.09150321656179629, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.37820626664913604, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1604629854143619, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3770931135093792, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.26199400535088346, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.5804827870380099, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.20533250289138671, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.49681810344665644, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.28467215304840787, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5192330975787584, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1672112091212215, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5078446328931894, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.251696695878184, - "sentence_nr": 6 + "score": 0.0950330051810703, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.6180491939580447, - "sentence_nr": 6 + "score": 0.2535554509913635, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.5967384019266717, - "sentence_nr": 6 + "score": 0.20378172261136207, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.8544348080833218, - "sentence_nr": 6 + "score": 0.48600008237332104, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.2719326877457978, - "sentence_nr": 6 + "score": 0.24513414885202045, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6002086362682414, - "sentence_nr": 6 + "score": 0.5476647609559218, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.23240102389974368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.4973274282641141, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 + "score": 0.2516768028374535, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 + "score": 0.47249781871556595, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.29170205300854224, - "sentence_nr": 6 + "score": 0.16105265992626083, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6498499527552988, - "sentence_nr": 6 + "score": 0.404377371664668, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2920934313715234, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5778663410753718, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.29170205300854224, - "sentence_nr": 6 + "score": 0.2573392925035755, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6498499527552988, - "sentence_nr": 6 + "score": 0.5328565784271402, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.29170205300854224, - "sentence_nr": 6 + "score": 0.27217589854489177, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6498499527552988, - "sentence_nr": 6 + "score": 0.5756343666825848, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 + "score": 0.12192550254402314, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 + "score": 0.3548931840142405, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 + "score": 0.12192550254402314, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7246227738353674, - "sentence_nr": 6 + "score": 0.3548931840142405, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.28592291256793106, - "sentence_nr": 6 + "score": 0.15813859795767055, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6102727682426059, - "sentence_nr": 6 + "score": 0.44607340294350173, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.2774290545068997, - "sentence_nr": 6 + "score": 0.12772369606112696, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6397454944654261, - "sentence_nr": 6 + "score": 0.38771162055402797, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.31671615012203974, - "sentence_nr": 6 + "score": 0.009832372776986684, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.6782734900436637, - "sentence_nr": 6 + "score": 0.06995003895819606, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.2748202507307579, - "sentence_nr": 6 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.5810363959809548, - "sentence_nr": 6 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 + "score": 0.005454501428722432, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.693456244639743, - "sentence_nr": 6 + "score": 0.02525205404411707, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.17905278399134197, - "sentence_nr": 7 + "score": 0.20533250289138671, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.37257295447029826, - "sentence_nr": 7 + "score": 0.45975635079501215, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 + "score": 0.3077422016953529, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 + "score": 0.5943673820353285, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.12620429887108936, - "sentence_nr": 7 + "score": 0.3077422016953529, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.35580703793872603, - "sentence_nr": 7 + "score": 0.5943673820353285, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.12872220631084524, - "sentence_nr": 7 + "score": 0.10081843650735454, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.33602633953270183, - "sentence_nr": 7 + "score": 0.38165410078357226, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.10316589800033629, - "sentence_nr": 7 + "score": 0.09564571510780719, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.3117209570799365, - "sentence_nr": 7 + "score": 0.1907009110214351, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 + "score": 0.10353153556093725, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 + "score": 0.40215410362634535, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.28653528640783255, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.03037224815656603, - "sentence_nr": 7 + "score": 0.5053636612097852, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.306354798881405, - "sentence_nr": 7 + "score": 0.17979384730979156, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.5275640293284548, - "sentence_nr": 7 + "score": 0.4177311931467539, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.10203846572325131, - "sentence_nr": 7 + "score": 0.2767429728676341, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.33381153680096753, - "sentence_nr": 7 + "score": 0.51183750216717, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.3192837057100497, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4886865884781344, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.1685643537060726, - "sentence_nr": 7 + "score": 0.3800528767347385, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.36926449644166065, - "sentence_nr": 7 + "score": 0.5075280145138238, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.014935758919429663, - "sentence_nr": 7 + "score": 0.3020162743522857, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.08106107745254391, - "sentence_nr": 7 + "score": 0.5582696703142395, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.044304867337633724, - "sentence_nr": 7 + "score": 0.28467215304840787, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.20806974344498103, - "sentence_nr": 7 + "score": 0.4298052820106505, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.1418524086391329, - "sentence_nr": 7 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.38295770773758747, - "sentence_nr": 7 + "score": 0.39365062043136434, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 + "score": 0.39365062043136434, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.030860166165309233, - "sentence_nr": 7 + "score": 0.19358934025667454, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.1100250143829584, - "sentence_nr": 7 + "score": 0.3468163065453778, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.21255327712152144, - "sentence_nr": 7 + "score": 0.183668071302564, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.43272151570555034, - "sentence_nr": 7 + "score": 0.35345479637314337, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.09632940954518097, - "sentence_nr": 7 + "score": 0.1286057341226582, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.3503787449131298, - "sentence_nr": 7 + "score": 0.2808359248223786, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 + "score": 0.3800528767347385, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 + "score": 0.6363479287351667, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.3295957765387521, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.05918530850500025, - "sentence_nr": 7 + "score": 0.625928739543479, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.22815217447604735, - "sentence_nr": 7 + "score": 0.24328450115124742, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.48973384754323573, - "sentence_nr": 7 + "score": 0.42756232255111404, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.1438459189500836, - "sentence_nr": 7 + "score": 0.383259260976792, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.30693371625402605, - "sentence_nr": 7 + "score": 0.5920204217586964, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.0979038733644086, - "sentence_nr": 7 + "score": 0.383259260976792, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.30211704738953993, - "sentence_nr": 7 + "score": 0.585064022155838, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.2288990188897003, - "sentence_nr": 7 + "score": 0.10599453139020258, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.48933901443699584, - "sentence_nr": 7 + "score": 0.33428722292794555, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.009624974244068071, - "sentence_nr": 7 + "score": 0.19180992590551618, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.07318255686027669, - "sentence_nr": 7 + "score": 0.3308002143045663, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "bleu", - "score": 0.043420474648595074, - "sentence_nr": 7 + "score": 0.22305706065076847, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "te", "task": "translation_from", "metric": "chrf", - "score": 0.2884095690753619, - "sentence_nr": 7 + "score": 0.5202782711775973, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.17382347640129553, - "sentence_nr": 7 + "score": 0.31326946419057006, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.4061580777885601, - "sentence_nr": 7 + "score": 0.591171976889058, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 + "score": 0.1702602472176709, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 + "score": 0.4366640707779677, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.10361854845420869, - "sentence_nr": 7 + "score": 0.2615659486493292, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.32774802711076473, - "sentence_nr": 7 + "score": 0.5126931977939474, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.15186969315425305, - "sentence_nr": 7 + "score": 0.1840585956032067, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3458120002305796, - "sentence_nr": 7 + "score": 0.5282240694221736, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.05109780779294313, - "sentence_nr": 7 + "score": 0.243584033816485, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.27453721503960304, - "sentence_nr": 7 + "score": 0.5152157451762782, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.19074380068002203, - "sentence_nr": 7 + "score": 0.30679661188970503, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.40566585096277824, - "sentence_nr": 7 + "score": 0.5967827151961556, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.15426765225005337, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.20031726728306523, - "sentence_nr": 7 + "score": 0.45289625960131974, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.28020858443704566, - "sentence_nr": 7 + "score": 0.16251572062938138, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.4608753441128863, - "sentence_nr": 7 + "score": 0.40123444651304624, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.20485833586704885, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.468735805943922, - "sentence_nr": 7 + "score": 0.22908476944452194, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_from", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.004770195810675918, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.03746566558042944, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.192481383169461, - "sentence_nr": 7 + "score": 0.1580553860161723, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3799051443349615, - "sentence_nr": 7 + "score": 0.41159403597129257, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.01252735726099625, - "sentence_nr": 7 + "score": 0.20675563318176166, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.3349039841689432, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.273148644463442, - "sentence_nr": 7 + "score": 0.6225752009608275, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.09431297723472011, - "sentence_nr": 7 + "score": 0.3057322895905473, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3616856339096348, - "sentence_nr": 7 + "score": 0.5821796659763352, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 + "score": 0.23240102389974368, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 + "score": 0.5335795368341105, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.31487248334376844, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.189717083187238, - "sentence_nr": 7 + "score": 0.5905881236136059, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.10266747466754884, - "sentence_nr": 7 + "score": 0.35994987563597164, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3364703638684802, - "sentence_nr": 7 + "score": 0.6548318751798113, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.14974627745213473, - "sentence_nr": 7 + "score": 0.158045000461378, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.3288440080133683, - "sentence_nr": 7 + "score": 0.40165223613435413, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.22381487678101888, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.5249370100068887, - "sentence_nr": 7 + "score": 0.03509992242758199, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.30204473943342836, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", "task": "translation_from", "metric": "chrf", - "score": 0.28912109037408523, - "sentence_nr": 7 + "score": 0.5394635390078353, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.22274170049761707, - "sentence_nr": 7 + "score": 0.5294442646627652, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4532692581443855, - "sentence_nr": 7 + "score": 0.7281375072835307, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.2144604484498437, - "sentence_nr": 7 + "score": 0.933651069586263, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.48894052224175993, - "sentence_nr": 7 + "score": 0.9586507529693243, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.933651069586263, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.9586507529693243, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.21001173689943997, - "sentence_nr": 7 + "score": 0.6337520241233826, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.423493931076046, - "sentence_nr": 7 + "score": 0.7734740773636255, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.006232910970143225, - "sentence_nr": 7 + "score": 0.7096224667917136, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.06317168666869727, - "sentence_nr": 7 + "score": 0.8862932371217843, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.06938388878349923, - "sentence_nr": 7 + "score": 0.7409995286953545, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3541078046399395, - "sentence_nr": 7 + "score": 0.9017022389242945, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.14557808399334188, - "sentence_nr": 7 + "score": 0.7096224667917136, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.36598346755702993, - "sentence_nr": 7 + "score": 0.8862932371217843, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.06897533888461813, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.2776666563000344, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.1665765483402476, - "sentence_nr": 7 + "score": 0.882190724997149, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4017968725013381, - "sentence_nr": 7 + "score": 0.943123392401343, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.13122945980689196, - "sentence_nr": 7 + "score": 0.39793093873955576, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.3219448107305951, - "sentence_nr": 7 + "score": 0.6648788692343665, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.10666682719585797, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.33462901494141756, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.04151505758906764, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.257476399230248, - "sentence_nr": 7 + "score": 0.7147882714185101, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4616305811028715, - "sentence_nr": 7 + "score": 0.8408773556139596, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.12189363728567917, - "sentence_nr": 7 + "score": 0.3359230828063256, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.37595660827287636, - "sentence_nr": 7 + "score": 0.573086119969458, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.7106361351765512, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.8390104107504974, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.18926971577178767, - "sentence_nr": 7 + "score": 0.7770554539970614, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.4931453714148122, - "sentence_nr": 7 + "score": 0.9260678176630538, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.8767740197085658, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.9055624233154097, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.004663531624960091, - "sentence_nr": 7 + "score": 0.8627586293513119, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.07262533604330305, - "sentence_nr": 7 + "score": 0.8964369716535558, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "bleu", - "score": 0.037401300306846526, - "sentence_nr": 7 + "score": 0.9082489095559809, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "jv", "task": "translation_from", "metric": "chrf", - "score": 0.27395881217705964, - "sentence_nr": 7 + "score": 0.9677853954871374, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.16780109158842918, - "sentence_nr": 7 + "score": 0.4162915990459618, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3968694014697679, - "sentence_nr": 7 + "score": 0.5970097205621886, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 + "score": 0.3816408219023713, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 + "score": 0.5784105768028126, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.1579497466001673, - "sentence_nr": 7 + "score": 0.39234342738825634, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.5092928545844059, - "sentence_nr": 7 + "score": 0.5972186089812455, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.16286876096900815, - "sentence_nr": 7 + "score": 0.361250819353898, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3422914837190449, - "sentence_nr": 7 + "score": 0.6105988260114965, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.46317041049597935, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6800890174642056, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0985948810698831, - "sentence_nr": 7 + "score": 0.4067505079817917, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.2593923911428194, - "sentence_nr": 7 + "score": 0.629072095000949, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.20198948917565754, - "sentence_nr": 7 + "score": 0.33713757310040376, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.34858221035657466, - "sentence_nr": 7 + "score": 0.5731908178757754, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.06888992790640074, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.2874483621307283, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.28562106983744195, - "sentence_nr": 7 + "score": 0.24931126684773341, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4749518446683913, - "sentence_nr": 7 + "score": 0.44539000137249984, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.1631196072688366, - "sentence_nr": 7 + "score": 0.20134984470993175, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3502730667074754, - "sentence_nr": 7 + "score": 0.47593263188822477, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.011439099361283091, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.07109011153265941, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.21286836557101563, - "sentence_nr": 7 + "score": 0.20786721468392394, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.45055232014427626, - "sentence_nr": 7 + "score": 0.2617811877857886, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.031126201157905466, - "sentence_nr": 7 + "score": 0.4840047962912239, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.015970144454664378, - "sentence_nr": 7 + "score": 0.706836097361849, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.06929847827527827, - "sentence_nr": 7 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.30185194035792856, - "sentence_nr": 7 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.11697642623186386, - "sentence_nr": 7 + "score": 0.18236198178601878, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.37117753637984835, - "sentence_nr": 7 + "score": 0.4546828651423093, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 + "score": 0.4710304492059704, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 + "score": 0.6951644805792349, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5068325976422298, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.17621963873521423, - "sentence_nr": 7 + "score": 0.7027078355532212, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.09916146090364127, - "sentence_nr": 7 + "score": 0.24793827875738764, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.3121110160693956, - "sentence_nr": 7 + "score": 0.4560817652765598, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.05658523178050362, - "sentence_nr": 7 + "score": 0.20786721468392394, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.27543168466680934, - "sentence_nr": 7 + "score": 0.22772581789894308, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "translation_from", "metric": "bleu", - "score": 0.22669486951066523, - "sentence_nr": 7 + "score": 0.36463597249757107, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "translation_from", "metric": "chrf", - "score": 0.4484451941575473, - "sentence_nr": 7 + "score": 0.5937597266341832, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.25848476545940924, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.011560595536104562, - "sentence_nr": 7 + "score": 0.4897308313348651, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.28619616266290565, - "sentence_nr": 7 + "score": 0.18398226639192106, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.509863149159976, - "sentence_nr": 7 + "score": 0.37285010531146734, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.07368089078790738, - "sentence_nr": 7 + "score": 0.18116830735735984, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.41452613113710224, - "sentence_nr": 7 + "score": 0.378028436956142, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.2539169853234758, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4294871148542432, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.17892846390928677, - "sentence_nr": 7 + "score": 0.4041338117274596, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.47088195615067674, - "sentence_nr": 7 + "score": 0.5261706483072474, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.012370537823050053, - "sentence_nr": 7 + "score": 0.2571503337178629, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.06660321132654005, - "sentence_nr": 7 + "score": 0.437073296721152, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.09831093939330879, - "sentence_nr": 7 + "score": 0.19129143021561437, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.33203866499974327, - "sentence_nr": 7 + "score": 0.390473445537339, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.13805615693046389, - "sentence_nr": 7 + "score": 0.11470856515430629, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.40787998733941394, - "sentence_nr": 7 + "score": 0.31103797207623773, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 + "score": 0.11470856515430629, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 + "score": 0.31103797207623773, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.17643078314788999, - "sentence_nr": 7 + "score": 0.12731505388718733, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.40757584786696294, - "sentence_nr": 7 + "score": 0.3354785767663773, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.1989414239237112, - "sentence_nr": 7 + "score": 0.1165747879955896, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3791567776918788, - "sentence_nr": 7 + "score": 0.29596615473767834, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.111871161288306, - "sentence_nr": 7 + "score": 0.2055434718326777, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.30886933073849715, - "sentence_nr": 7 + "score": 0.4145758427516506, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.09916009482330297, - "sentence_nr": 7 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.3032928217006101, - "sentence_nr": 7 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.09453698369211004, - "sentence_nr": 7 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.266802610525441, - "sentence_nr": 7 + "score": 0.2541277777982055, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4519949729282707, - "sentence_nr": 7 + "score": 0.4083801144711196, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.1995980198896431, - "sentence_nr": 7 + "score": 0.24814514148154546, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.4244503391142409, - "sentence_nr": 7 + "score": 0.5127275550938416, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.2941675978352054, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5720229382661602, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.18216362398065106, - "sentence_nr": 7 + "score": 0.1361285171343236, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.36524832602306334, - "sentence_nr": 7 + "score": 0.2916549694940723, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.0440854328121759, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.08319287955437346, - "sentence_nr": 7 + "score": 0.053628247089794495, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "bleu", - "score": 0.08383676689911676, - "sentence_nr": 7 + "score": 0.2500984051960647, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "ta", "task": "translation_from", "metric": "chrf", - "score": 0.2855329690010324, - "sentence_nr": 7 + "score": 0.510079353939284, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.19148282873929853, - "sentence_nr": 7 + "score": 0.29851690541541476, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4707949702068854, - "sentence_nr": 7 + "score": 0.6224209860013706, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 + "score": 0.26958884543190903, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 + "score": 0.5631664732610485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.08183353655679478, - "sentence_nr": 7 + "score": 0.2503955135641583, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.25007633393249695, - "sentence_nr": 7 + "score": 0.5411247834284307, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0981642545874085, - "sentence_nr": 7 + "score": 0.26115021337737276, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.31793222329793575, - "sentence_nr": 7 + "score": 0.5520240492306279, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.27837899330129656, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5673065268668616, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.008104773738922768, - "sentence_nr": 7 + "score": 0.37856950851858595, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.07651300648420487, - "sentence_nr": 7 + "score": 0.6309392505592764, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.09478705591775652, - "sentence_nr": 7 + "score": 0.27057949011516347, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.33293232395887284, - "sentence_nr": 7 + "score": 0.5644281635271426, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.11976209355757551, - "sentence_nr": 7 + "score": 0.14361580529268292, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.27004759126600675, - "sentence_nr": 7 + "score": 0.38179088353033064, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.18740401341623053, - "sentence_nr": 7 + "score": 0.14361580529268292, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.47595080320432986, - "sentence_nr": 7 + "score": 0.38179088353033064, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.09142555538569784, - "sentence_nr": 7 + "score": 0.1676495122493199, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.31371707771405133, - "sentence_nr": 7 + "score": 0.36959105080753546, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.05438497632520132, - "sentence_nr": 7 + "score": 0.2297132059983132, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.26123506271154656, - "sentence_nr": 7 + "score": 0.4379547539491647, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.1490232164900303, - "sentence_nr": 7 + "score": 0.1839576031944879, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.43745835724045856, - "sentence_nr": 7 + "score": 0.2651734406129379, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.005606294971348417, - "sentence_nr": 7 + "score": 0.37416241283362434, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.06662245090541388, - "sentence_nr": 7 + "score": 0.6754691163599457, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.3556383209223338, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.12752236829255797, - "sentence_nr": 7 + "score": 0.6120361598575286, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.12157241570357182, - "sentence_nr": 7 + "score": 0.19300403619224038, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.4080990097991491, - "sentence_nr": 7 + "score": 0.572822450824776, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 + "score": 0.2799103318440567, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 + "score": 0.6172633503183605, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.10415298161056984, - "sentence_nr": 7 + "score": 0.3888403695409309, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.33452632923050557, - "sentence_nr": 7 + "score": 0.6733543865794972, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.21204239268527586, - "sentence_nr": 7 + "score": 0.03223351941652736, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.3846197304420823, - "sentence_nr": 7 + "score": 0.2249430587095686, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.10793175369072293, - "sentence_nr": 7 + "score": 0.15492402556203205, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.29660617805108785, - "sentence_nr": 7 + "score": 0.3125746694462918, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", "task": "translation_from", "metric": "bleu", - "score": 0.14057105892389254, - "sentence_nr": 7 + "score": 0.23233851806966574, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", "task": "translation_from", "metric": "chrf", - "score": 0.3028381427383384, - "sentence_nr": 7 + "score": 0.5357993047462365, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.004763623056487517, - "sentence_nr": 7 + "score": 0.29796912700911177, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.07485928007606017, - "sentence_nr": 7 + "score": 0.5158892363484622, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.26242764207671093, - "sentence_nr": 7 + "score": 0.4005296397635166, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.48831610046795776, - "sentence_nr": 7 + "score": 0.6201785376974677, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.11689600237805012, - "sentence_nr": 7 + "score": 0.4005296397635166, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.38258301195690664, - "sentence_nr": 7 + "score": 0.591086403119955, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.3114493863658917, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5682352098535879, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.19809535837880818, - "sentence_nr": 7 + "score": 0.27824533355692294, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.447539350421338, - "sentence_nr": 7 + "score": 0.5540161525695098, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.004718557257042585, - "sentence_nr": 7 + "score": 0.21815383167015925, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.08019304349523304, - "sentence_nr": 7 + "score": 0.46829275965930944, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.05614653993259943, - "sentence_nr": 7 + "score": 0.24285172240675165, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.26485323792360876, - "sentence_nr": 7 + "score": 0.4655392375590772, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.16431887969160053, - "sentence_nr": 7 + "score": 0.24527664931927695, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4088971379214799, - "sentence_nr": 7 + "score": 0.4846287820466136, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.11452508920842025, - "sentence_nr": 7 + "score": 0.24527664931927695, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3212742401272785, - "sentence_nr": 7 + "score": 0.4846287820466136, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.08218359452575877, - "sentence_nr": 7 + "score": 0.008072417039197614, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.23905391762860753, - "sentence_nr": 7 + "score": 0.06388295238713035, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.17673835621668263, - "sentence_nr": 7 + "score": 0.013616214739412552, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3902085179927465, - "sentence_nr": 7 + "score": 0.0583054295241246, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.12351824822447698, - "sentence_nr": 7 + "score": 0.010659396622169299, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.3906003941586503, - "sentence_nr": 7 + "score": 0.06195563757831697, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.13635319583999642, - "sentence_nr": 7 + "score": 0.5091686102291805, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.2850432830231861, - "sentence_nr": 7 + "score": 0.7320513799881168, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.051272222858601425, - "sentence_nr": 7 + "score": 0.3297870801503319, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.21925629669878902, - "sentence_nr": 7 + "score": 0.5720518025066121, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.2730720849895809, - "sentence_nr": 7 + "score": 0.19851743023355672, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4904647237477465, - "sentence_nr": 7 + "score": 0.547468247371695, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.20362195873137665, - "sentence_nr": 7 + "score": 0.341534333789316, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.4504603915919526, - "sentence_nr": 7 + "score": 0.5541872808406454, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.4024358402935841, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6233256624828218, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.20927351091825444, - "sentence_nr": 7 + "score": 0.15222275337751698, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.41232284529686536, - "sentence_nr": 7 + "score": 0.4114953475207692, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.006488743008712295, - "sentence_nr": 7 + "score": 0.12031041493621579, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.056679733231823716, - "sentence_nr": 7 + "score": 0.2540681992986826, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "bleu", - "score": 0.04209313835422283, - "sentence_nr": 7 + "score": 0.21910942711629067, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "bcp_47": "tr", "task": "translation_from", "metric": "chrf", - "score": 0.26913406771501547, - "sentence_nr": 7 + "score": 0.4572726847360035, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.15184278721506198, - "sentence_nr": 7 + "score": 0.17377261603583774, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4093399937921707, - "sentence_nr": 7 + "score": 0.4342710497791623, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.15478222669012726, - "sentence_nr": 7 + "score": 0.15956483578595942, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.3550584759508654, - "sentence_nr": 7 + "score": 0.425693420655628, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.06244445123318812, - "sentence_nr": 7 + "score": 0.09791579531860735, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.28239834932587327, - "sentence_nr": 7 + "score": 0.40432986440529917, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.15685632649880807, - "sentence_nr": 7 + "score": 0.10423563468216913, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.34378295878971765, - "sentence_nr": 7 + "score": 0.34390006822858976, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.17513129766126637, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.47773919189902136, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.04731666745775026, - "sentence_nr": 7 + "score": 0.12211781276995526, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.263716605484278, - "sentence_nr": 7 + "score": 0.4347129560595505, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.06070088845782673, - "sentence_nr": 7 + "score": 0.07276375309803214, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.2584364364927186, - "sentence_nr": 7 + "score": 0.38861839385008856, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.06467646497347093, - "sentence_nr": 7 + "score": 0.14651860136741404, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.2374647159547877, - "sentence_nr": 7 + "score": 0.3998401696111528, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.21356649685679252, - "sentence_nr": 7 + "score": 0.12217872405854398, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4243725761948095, - "sentence_nr": 7 + "score": 0.3801321934310697, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.05675489168243481, - "sentence_nr": 7 + "score": 0.11125382292406938, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.3098329822024127, - "sentence_nr": 7 + "score": 0.36362589237918785, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.05786688295409777, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.3224621533587573, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.14459834065375157, - "sentence_nr": 7 + "score": 0.11912074202372534, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4652483976219767, - "sentence_nr": 7 + "score": 0.2825717159828524, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.056417721736162135, - "sentence_nr": 7 + "score": 0.24759502840925565, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.14707146406788849, - "sentence_nr": 7 + "score": 0.5238368454821934, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.050577564370191244, - "sentence_nr": 7 + "score": 0.25797824300060007, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.26455598459911367, - "sentence_nr": 7 + "score": 0.5649286213774056, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.16758563722627876, - "sentence_nr": 7 + "score": 0.16285656455323885, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4598125962895632, - "sentence_nr": 7 + "score": 0.41845720590580077, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.07875433150726119, - "sentence_nr": 7 + "score": 0.1752818941059842, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.2638954513805452, - "sentence_nr": 7 + "score": 0.4632619281274353, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.12334630141873701, - "sentence_nr": 7 + "score": 0.22659733169841595, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.3570869171580578, - "sentence_nr": 7 + "score": 0.5133896303093581, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.19153195331287226, - "sentence_nr": 7 + "score": 0.07198169585307328, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.4035796398628449, - "sentence_nr": 7 + "score": 0.34476508589802557, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.13346878790838718, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.30809134673210076, - "sentence_nr": 7 + "score": 0.06150895863726817, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "translation_from", "metric": "bleu", - "score": 0.1054433514098504, - "sentence_nr": 7 + "score": 0.20484322521277584, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "translation_from", "metric": "chrf", - "score": 0.2840946641780818, - "sentence_nr": 7 + "score": 0.4996688420059074, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.005649824351905227, - "sentence_nr": 7 + "score": 0.33631398011857205, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.09384599631616997, - "sentence_nr": 7 + "score": 0.6332428715049205, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.26356108382670784, - "sentence_nr": 7 + "score": 0.2323385180696658, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.47640029725892175, - "sentence_nr": 7 + "score": 0.5019509292309764, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.1738582449442553, - "sentence_nr": 7 + "score": 0.19726472415983368, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.40161714405254456, - "sentence_nr": 7 + "score": 0.5084335265908847, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.22897967367089514, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5158963534678644, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.16120676251405475, - "sentence_nr": 7 + "score": 0.3061554451423933, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3934823211441987, - "sentence_nr": 7 + "score": 0.6130035189889428, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.006102253115653432, - "sentence_nr": 7 + "score": 0.23782678885662012, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.08614490649176082, - "sentence_nr": 7 + "score": 0.551668684772089, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.05052791122570277, - "sentence_nr": 7 + "score": 0.35205535634937346, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.25244788085139286, - "sentence_nr": 7 + "score": 0.5769772651090223, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.18041700926694673, - "sentence_nr": 7 + "score": 0.20310341961604592, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.43852448917973136, - "sentence_nr": 7 + "score": 0.42469202210051515, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.10734088848154077, - "sentence_nr": 7 + "score": 0.21910942711629067, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.33946796348247366, - "sentence_nr": 7 + "score": 0.41756166197217914, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.12499287263993265, - "sentence_nr": 7 + "score": 0.196046355324564, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3031531068573407, - "sentence_nr": 7 + "score": 0.43350415347352517, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.14318317227039934, - "sentence_nr": 7 + "score": 0.1927168036916521, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.356756117753337, - "sentence_nr": 7 + "score": 0.4181794106251381, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.059890467169145326, - "sentence_nr": 7 + "score": 0.1998527160015335, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.2852233544262429, - "sentence_nr": 7 + "score": 0.31962498795801614, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.12768613576122964, - "sentence_nr": 7 + "score": 0.42962975172642387, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.3279857505284436, - "sentence_nr": 7 + "score": 0.6526674280609184, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.411677998679592, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.05915285533036862, - "sentence_nr": 7 + "score": 0.6679764037281967, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.2529913062741307, - "sentence_nr": 7 + "score": 0.19057582910221915, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.5047314299093542, - "sentence_nr": 7 + "score": 0.44257167922089413, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.15720527174368754, - "sentence_nr": 7 + "score": 0.2799506947952143, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.4715103005986015, - "sentence_nr": 7 + "score": 0.5700777642587023, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.2799506947952143, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5714878050026524, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.1853793533058344, - "sentence_nr": 7 + "score": 0.1958641343177127, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.47839321418703307, - "sentence_nr": 7 + "score": 0.42370679353391144, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.21349841283886073, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.2904194745959351, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "ko", "task": "translation_from", "metric": "chrf", - "score": 0.27204846616025496, - "sentence_nr": 7 + "score": 0.5399262338172586, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.15197436941722972, - "sentence_nr": 7 + "score": 0.2453238227047589, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.37271000364127155, - "sentence_nr": 7 + "score": 0.55017080577881, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.17795920517030017, - "sentence_nr": 7 + "score": 0.22952177306405494, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.41862955401967455, - "sentence_nr": 7 + "score": 0.5279520952576137, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.17060644184287996, - "sentence_nr": 7 + "score": 0.2950615456579434, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4054584763100862, - "sentence_nr": 7 + "score": 0.5675143775417766, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.17670199390439656, - "sentence_nr": 7 + "score": 0.18643403650822063, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.36682227371085463, - "sentence_nr": 7 + "score": 0.5048280010205698, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.3466720379073123, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.6312259441479694, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.11509582662256548, - "sentence_nr": 7 + "score": 0.2363347557282754, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.2567415288537121, - "sentence_nr": 7 + "score": 0.5588648358676678, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.16136987880724096, - "sentence_nr": 7 + "score": 0.21108332811806296, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.33626920748765377, - "sentence_nr": 7 + "score": 0.5847750744232335, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.05468777721214362, - "sentence_nr": 7 + "score": 0.1490671937611913, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.2495519218392036, - "sentence_nr": 7 + "score": 0.4338972266367216, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.2700569573710134, - "sentence_nr": 7 + "score": 0.1490671937611913, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4456029881233736, - "sentence_nr": 7 + "score": 0.4338972266367216, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.2195372587354865, - "sentence_nr": 7 + "score": 0.1543252261021413, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.3664303672465512, - "sentence_nr": 7 + "score": 0.3797469086855575, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.1649662542496744, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4276666255930192, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.17524367912943578, - "sentence_nr": 7 + "score": 0.1409879074813521, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.3908643084796051, - "sentence_nr": 7 + "score": 0.31162348789027916, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.018807992767181335, - "sentence_nr": 7 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.0887797545718027, - "sentence_nr": 7 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.06437840881729344, - "sentence_nr": 7 + "score": 0.27217589854489177, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.26576141148273813, - "sentence_nr": 7 + "score": 0.5875203796536156, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.598931508663349, - "sentence_nr": 8 + "score": 0.16617179744038174, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.7353063745802827, - "sentence_nr": 8 + "score": 0.48117026601244495, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 + "score": 0.23584494013034235, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 + "score": 0.5389135951883217, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.1327526847508867, - "sentence_nr": 8 + "score": 0.3317416308285443, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.37850602486495205, - "sentence_nr": 8 + "score": 0.6310870941590575, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.18405035438430847, - "sentence_nr": 8 + "score": 0.1606777496729309, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4142901090120915, - "sentence_nr": 8 + "score": 0.5196854930551574, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.11719733243527225, - "sentence_nr": 8 + "score": 0.011961593826815614, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.4023449434976014, - "sentence_nr": 8 + "score": 0.10171812934151993, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "translation_from", "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 + "score": 0.27271804425850804, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "translation_from", "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 + "score": 0.6231583014699292, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.24239458593560292, - "sentence_nr": 8 + "score": 0.2063529291350913, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.32069132319909655, - "sentence_nr": 8 + "score": 0.41364248023079064, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.47039611627724026, - "sentence_nr": 8 + "score": 0.3618488169166299, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.6298835480734482, - "sentence_nr": 8 + "score": 0.5708179622131996, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.22478613858269392, - "sentence_nr": 8 + "score": 0.44536846829231563, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.44348101018104913, - "sentence_nr": 8 + "score": 0.626601305779226, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.45286243450930924, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5635810887606836, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.4026159305424288, - "sentence_nr": 8 + "score": 0.35484390943924515, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.5712560131047175, - "sentence_nr": 8 + "score": 0.6152611695652526, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.30829953663635573, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.183687049781416, - "sentence_nr": 8 + "score": 0.5552978544646301, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.351911486970854, - "sentence_nr": 8 + "score": 0.3520774812078196, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.5181825846579515, - "sentence_nr": 8 + "score": 0.5735788202105873, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.4262221594184117, - "sentence_nr": 8 + "score": 0.24664751641319077, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.5886657414856064, - "sentence_nr": 8 + "score": 0.49266142807316493, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 + "score": 0.24715873794308874, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 + "score": 0.4876485132692386, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.25861130592298187, - "sentence_nr": 8 + "score": 0.2336554865490948, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.39452644092432093, - "sentence_nr": 8 + "score": 0.38695981569355575, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.20379250618355427, - "sentence_nr": 8 + "score": 0.23374260773620212, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.41085414309816914, - "sentence_nr": 8 + "score": 0.4481831080646353, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.20644156444875764, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.24522672479887386, - "sentence_nr": 8 + "score": 0.2756944940230347, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 + "score": 0.6152190692949244, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.2990226215771518, - "sentence_nr": 8 + "score": 0.3602927398528427, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.4323734152924571, - "sentence_nr": 8 + "score": 0.6148633329091892, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.44212827861876997, - "sentence_nr": 8 + "score": 0.20241924705494113, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.5843055590705747, - "sentence_nr": 8 + "score": 0.4785445547362629, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 8 + "score": 0.3291745670182042, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.3012789660952507, - "sentence_nr": 8 + "score": 0.5666889547785301, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.3561691349239915, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5927193326481119, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.17248715680799764, - "sentence_nr": 8 + "score": 0.269865498431692, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.40043565243219187, - "sentence_nr": 8 + "score": 0.4414378099856194, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2026004770366011, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.13835317113453516, - "sentence_nr": 8 + "score": 0.30270818881631195, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "bleu", - "score": 0.16343842313572918, - "sentence_nr": 8 + "score": 0.27353798204430885, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation_from", "metric": "chrf", - "score": 0.3986641525285075, - "sentence_nr": 8 + "score": 0.4703704190720731, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.10586140133972588, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5792139686527714, - "sentence_nr": 8 + "score": 0.4674053477944039, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.1712766252338756, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.5225554962608486, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.1549337617358287, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.47063780888858964, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.25798723088167685, - "sentence_nr": 8 + "score": 0.087593103737711, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5244854229988815, - "sentence_nr": 8 + "score": 0.41197148322773003, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.24517917828823954, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5630094449221897, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.236963478377094, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5676965183365866, - "sentence_nr": 8 + "score": 0.5659243374651276, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.3315037521841549, - "sentence_nr": 8 + "score": 0.1506914981676572, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.468197879470805, - "sentence_nr": 8 + "score": 0.49409850038698094, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.460474309246715, - "sentence_nr": 8 + "score": 0.13731102930446024, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6213537794704693, - "sentence_nr": 8 + "score": 0.4144705269436304, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.13731102930446024, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5834549494301647, - "sentence_nr": 8 + "score": 0.4144705269436304, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.09993298280365949, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5676965183365866, - "sentence_nr": 8 + "score": 0.48180385986334856, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.11846592694382017, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3317492265355767, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.09716516292186668, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.27400718580798855, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.003172770121174655, - "sentence_nr": 8 + "score": 0.016381257887983454, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.5834549494301647, - "sentence_nr": 8 + "score": 0.0197185083394535, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.46092611919700416, - "sentence_nr": 8 + "score": 0.11760297043792217, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.6365915338629015, - "sentence_nr": 8 + "score": 0.5037771891801089, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 + "score": 0.31178681104808115, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 + "score": 0.6235184561135673, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.39029400374523116, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.3344305108778801, - "sentence_nr": 8 + "score": 0.6594442432367803, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.1028840244182704, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.37017501464955627, - "sentence_nr": 8 + "score": 0.3793737661689232, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.23874935220945062, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.4641140866580373, - "sentence_nr": 8 + "score": 0.0054987473538930624, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "translation_from", "metric": "bleu", - "score": 0.22218130727359342, - "sentence_nr": 8 + "score": 0.18502495276209577, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "translation_from", "metric": "chrf", - "score": 0.39929356245904674, - "sentence_nr": 8 + "score": 0.49774903659564634, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.3194331635465395, - "sentence_nr": 8 + "score": 0.2703645496410475, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5031092445628172, - "sentence_nr": 8 + "score": 0.5129310433304475, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.4094746713677566, - "sentence_nr": 8 + "score": 0.2709079038456153, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5831913487431972, - "sentence_nr": 8 + "score": 0.447458019441992, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.334422418242443, - "sentence_nr": 8 + "score": 0.27075075499555246, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.47577086062579566, - "sentence_nr": 8 + "score": 0.5079958750910802, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.19134492872562123, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4261251088174477, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.4482907809719588, - "sentence_nr": 8 + "score": 0.28450701980252824, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5498272118133005, - "sentence_nr": 8 + "score": 0.5183351563352739, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2772655014585435, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.02467424260792568, - "sentence_nr": 8 + "score": 0.5163486940590201, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.21902340561392236, - "sentence_nr": 8 + "score": 0.2797290030028961, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.43906671679239717, - "sentence_nr": 8 + "score": 0.5092945860838002, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.5234484809182233, - "sentence_nr": 8 + "score": 0.17938074771051948, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.6658297773613274, - "sentence_nr": 8 + "score": 0.37004164663464306, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 + "score": 0.18380806980705947, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 + "score": 0.37732671671330364, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.30007504691018483, - "sentence_nr": 8 + "score": 0.043167422631559454, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5153810823423555, - "sentence_nr": 8 + "score": 0.14513634182269314, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.14074957769288798, - "sentence_nr": 8 + "score": 0.1511810638395761, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.3750035199199742, - "sentence_nr": 8 + "score": 0.38240796284180606, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.2305720744624697, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.4655848022427904, - "sentence_nr": 8 + "score": 0.1623128779243406, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.26538706048179084, - "sentence_nr": 8 + "score": 0.005454501428722432, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.4982627378595717, - "sentence_nr": 8 + "score": 0.02525205404411707, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.232738415750697, - "sentence_nr": 8 + "score": 0.24420961057522048, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.49618971681248764, - "sentence_nr": 8 + "score": 0.529409842751342, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.45479124441660884, - "sentence_nr": 8 + "score": 0.20787389114353938, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5872444902217987, - "sentence_nr": 8 + "score": 0.5395612843354369, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.3488611533620711, - "sentence_nr": 8 + "score": 0.19107912313367556, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5550499651473632, - "sentence_nr": 8 + "score": 0.47457026427329674, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.2482239530137079, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5350941461126912, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.31883477089875656, - "sentence_nr": 8 + "score": 0.06104408840180951, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.5510450101159524, - "sentence_nr": 8 + "score": 0.2770877905448079, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.18227211511988975, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.038236956722392024, - "sentence_nr": 8 + "score": 0.26739926509879147, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2244921781886412, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "gu", "task": "translation_from", "metric": "chrf", - "score": 0.31998097041178836, - "sentence_nr": 8 + "score": 0.48547869147117434, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.3815250264738168, - "sentence_nr": 8 + "score": 0.2492031334256811, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.6516314751979607, - "sentence_nr": 8 + "score": 0.4923163374806021, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 + "score": 0.2907608105126149, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 + "score": 0.5445465034944268, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.189902924205034, - "sentence_nr": 8 + "score": 0.30219157030008637, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.4072184389907138, - "sentence_nr": 8 + "score": 0.5492288689573782, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.18710260593933364, - "sentence_nr": 8 + "score": 0.2166046272179384, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.44334313717706003, - "sentence_nr": 8 + "score": 0.5246154268333804, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.20498415630763028, - "sentence_nr": 8 + "score": 0.4173623671609102, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.41559166314823337, - "sentence_nr": 8 + "score": 0.653946816582661, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.15161074985415177, - "sentence_nr": 8 + "score": 0.34324291276089697, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.3796830006266126, - "sentence_nr": 8 + "score": 0.6000833980598983, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.07757069009917116, - "sentence_nr": 8 + "score": 0.26036802768146033, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.3253161209971999, - "sentence_nr": 8 + "score": 0.5255752089611478, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.533167536340577, - "sentence_nr": 8 + "score": 0.1545474372263506, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.6607380945724757, - "sentence_nr": 8 + "score": 0.4385517123611144, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.22168992033645996, - "sentence_nr": 8 + "score": 0.12611435407515667, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.523689661176845, - "sentence_nr": 8 + "score": 0.416865407084071, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.17714787947168362, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4959629506636555, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.2722704374402053, - "sentence_nr": 8 + "score": 0.15979493106715062, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", + "model": "mistralai/mistral-saba", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.596004654894533, - "sentence_nr": 8 + "score": 0.39705750344710744, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.11805334969075956, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.01008902035184167, - "sentence_nr": 8 + "score": 0.29355162288859277, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.10279947040838337, - "sentence_nr": 8 + "score": 0.4427450627622687, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.3569840483632983, - "sentence_nr": 8 + "score": 0.5831413075310591, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.40003810431098236, - "sentence_nr": 8 + "score": 0.4980380502241239, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.5899097408105687, - "sentence_nr": 8 + "score": 0.716927044471585, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 + "score": 0.24793827875738764, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 + "score": 0.5551406879143232, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.2097387761551816, - "sentence_nr": 8 + "score": 0.5143871785925975, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.49663301508497226, - "sentence_nr": 8 + "score": 0.7463887744436826, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.27067168022307464, - "sentence_nr": 8 + "score": 0.3817026511181546, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.5439625482235064, - "sentence_nr": 8 + "score": 0.6579835999035872, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.40066361284662694, - "sentence_nr": 8 + "score": 0.09375485531078004, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.5020331850026643, - "sentence_nr": 8 + "score": 0.38990742377079385, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.18679710353734788, - "sentence_nr": 8 + "score": 0.1568616676699092, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.3876457319870774, - "sentence_nr": 8 + "score": 0.3333409033359294, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "th", "task": "translation_from", "metric": "bleu", - "score": 0.2826204057042236, - "sentence_nr": 8 + "score": 0.2256379391347521, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "th", "task": "translation_from", "metric": "chrf", - "score": 0.5043062352893725, - "sentence_nr": 8 + "score": 0.4863283179636851, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.36330554542576643, - "sentence_nr": 8 + "score": 0.43104504141832617, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5680915937757648, - "sentence_nr": 8 + "score": 0.5953439401847398, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.29588994069727786, - "sentence_nr": 8 + "score": 0.27907188689389983, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5527117669081858, - "sentence_nr": 8 + "score": 0.5093017176589221, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.2276330877377012, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.43289197888514347, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.46732353406180216, - "sentence_nr": 8 + "score": 0.28652679283739385, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6059276585345114, - "sentence_nr": 8 + "score": 0.4341260827393413, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.29512789240986326, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.06266083709457643, - "sentence_nr": 8 + "score": 0.490746523794099, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.36565527196849945, - "sentence_nr": 8 + "score": 0.2425369392285065, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.4882803186347697, - "sentence_nr": 8 + "score": 0.46048415870417647, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.38047531731529327, - "sentence_nr": 8 + "score": 0.240340920378981, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.49485723102957346, - "sentence_nr": 8 + "score": 0.4624667456597986, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 + "score": 0.06452288310243297, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 + "score": 0.1340641152212824, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 + "score": 0.06452288310243297, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.4672309378181727, - "sentence_nr": 8 + "score": 0.1340641152212824, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.21815383167015925, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.29886658673327365, - "sentence_nr": 8 + "score": 0.37444773636111656, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.12729992303943352, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.18485608959451436, - "sentence_nr": 8 + "score": 0.34123483868512156, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.21812881407613688, - "sentence_nr": 8 + "score": 0.2339053691840767, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.3598346059855135, - "sentence_nr": 8 + "score": 0.34360669379010167, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.1367498402979849, - "sentence_nr": 8 + "score": 0.33366586275504884, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.3943841419148219, - "sentence_nr": 8 + "score": 0.597451728834618, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.3890858187033785, - "sentence_nr": 8 + "score": 0.3239689744606511, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.517430128986546, - "sentence_nr": 8 + "score": 0.5878660875539871, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.5676006714726635, - "sentence_nr": 8 + "score": 0.22991579208155866, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.6880701448812352, - "sentence_nr": 8 + "score": 0.4195683527550329, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.3529455532976322, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5455305267266531, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.2974074484950165, - "sentence_nr": 8 + "score": 0.35730397938750963, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.5121581247515657, - "sentence_nr": 8 + "score": 0.5470354051417882, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.09777262451497228, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3252901824093446, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2119156724475127, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.010162846529607748, - "sentence_nr": 8 + "score": 0.37835612878198044, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "bleu", - "score": 0.21947959999379651, - "sentence_nr": 8 + "score": 0.23150355132919254, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "kn", "task": "translation_from", "metric": "chrf", - "score": 0.3226457008913864, - "sentence_nr": 8 + "score": 0.48517162463112556, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.4489235959690452, - "sentence_nr": 8 + "score": 0.20863984464930022, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5934678825154104, - "sentence_nr": 8 + "score": 0.45879801940552783, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 + "score": 0.2695149221768555, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 + "score": 0.4713033964653895, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.28800869328515505, - "sentence_nr": 8 + "score": 0.08839914053546608, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.49348678623542436, - "sentence_nr": 8 + "score": 0.13210046935115544, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.2429163097293302, - "sentence_nr": 8 + "score": 0.22669629371608005, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5044329486461447, - "sentence_nr": 8 + "score": 0.4780713176952279, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.3497701087305086, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.5444479222621443, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.3026681380066168, - "sentence_nr": 8 + "score": 0.321796895215673, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.492659495510912, - "sentence_nr": 8 + "score": 0.5555275088172927, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.24586918158076287, - "sentence_nr": 8 + "score": 0.35592474790742606, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.4658595745396681, - "sentence_nr": 8 + "score": 0.5565115125775245, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.23073085454808062, - "sentence_nr": 8 + "score": 0.18553468405289997, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.44142087654422146, - "sentence_nr": 8 + "score": 0.3940963548952763, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.43218646131638366, - "sentence_nr": 8 + "score": 0.19049091104611224, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5402913319043152, - "sentence_nr": 8 + "score": 0.40060792914131615, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.4406612884550454, - "sentence_nr": 8 + "score": 0.2062051322624683, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5676112112992767, - "sentence_nr": 8 + "score": 0.41192098101671093, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.18379651914635886, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.36124868671762816, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.4476950425126913, - "sentence_nr": 8 + "score": 0.016680172518945505, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.5932980209045412, - "sentence_nr": 8 + "score": 0.049377315765891015, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.1483315516064897, - "sentence_nr": 8 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.25249051585915977, - "sentence_nr": 8 + "score": 0.38565864734759825, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.38558450790399557, - "sentence_nr": 8 + "score": 0.6607139809585929, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 + "score": 0.2505547110465864, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 + "score": 0.48036570052288885, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.37294595046144213, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.598127662881266, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.35451444797329384, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5965971449645624, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "microsoft/phi-4", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.15146825617322526, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", + "model": "microsoft/phi-4", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 0.0067104198717751464, - "sentence_nr": 8 + "score": 0.33785686884467514, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.0034593773364647584, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.10587910341470286, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.3295957765387521, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5207914581240252, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.9025232868361638, - "sentence_nr": 8 + "score": 0.27338789256007584, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.9169897590736298, - "sentence_nr": 8 + "score": 0.5429269981031598, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.2075953797357176, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.4344742362498603, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.9709835434146469, - "sentence_nr": 8 + "score": 0.293816771214877, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.9951728990866464, - "sentence_nr": 8 + "score": 0.4842449940538771, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.2063529291350913, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48123766976272336, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 + "score": 0.41737386808061633, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 + "score": 0.6106512856048538, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.8935248372106969, - "sentence_nr": 8 + "score": 0.3268233487541633, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.9404428602061264, - "sentence_nr": 8 + "score": 0.5342805780357801, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.27075075499555246, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5201548999535662, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.3642482472579296, - "sentence_nr": 8 + "score": 0.2306483512507339, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5342538783335161, - "sentence_nr": 8 + "score": 0.45504570030979885, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.18831933500600306, - "sentence_nr": 8 + "score": 0.23095037282123296, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.4318025704181776, - "sentence_nr": 8 + "score": 0.39553614028855416, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.4439623527529193, - "sentence_nr": 8 + "score": 0.11546772122737221, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5309137918519957, - "sentence_nr": 8 + "score": 0.2604812246395473, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.09815096202645017, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.2628849077177109, - "sentence_nr": 8 + "score": 0.08571854412510894, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.34613789243685805, - "sentence_nr": 8 + "score": 0.1334566096462065, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.49330323192607783, - "sentence_nr": 8 + "score": 0.25537293849808335, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.21039673882735752, - "sentence_nr": 8 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.3872019296036794, - "sentence_nr": 8 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.17879309995151985, - "sentence_nr": 8 + "score": 0.4175984139783788, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.2568045428196672, - "sentence_nr": 8 + "score": 0.6502955433874209, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.43218646131638366, - "sentence_nr": 8 + "score": 0.26709890828869226, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.5369715393520321, - "sentence_nr": 8 + "score": 0.5611797797204635, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.6099084961389527, - "sentence_nr": 8 + "score": 0.3658141331541051, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.658015760514539, - "sentence_nr": 8 + "score": 0.6386889736882309, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.3748533897614559, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6415822793896023, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.2464380578618272, - "sentence_nr": 8 + "score": 0.17001078098404232, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.4278399263644655, - "sentence_nr": 8 + "score": 0.401579352670784, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.07964662206989197, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.1879604201975219, - "sentence_nr": 8 + "score": 0.08209382469898788, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "bleu", - "score": 0.11956615218925931, - "sentence_nr": 8 + "score": 0.24759502840925565, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "bcp_47": "or", "task": "translation_from", "metric": "chrf", - "score": 0.3305337714496588, - "sentence_nr": 8 + "score": 0.5330580248133261, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.28406136898728457, - "sentence_nr": 8 + "score": 0.3703971546860334, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5649283064490618, - "sentence_nr": 8 + "score": 0.6509854048597393, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.21544027588567594, - "sentence_nr": 8 + "score": 0.3020089249326176, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5040038440508637, - "sentence_nr": 8 + "score": 0.5666791239956741, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3169340575963432, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.28552127890094825, - "sentence_nr": 8 + "score": 0.6047772126282382, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2728224724839342, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.16448947606185552, - "sentence_nr": 8 + "score": 0.5858336859170117, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.30630098078522544, - "sentence_nr": 8 + "score": 0.3468503425098983, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.506196410096354, - "sentence_nr": 8 + "score": 0.6177731406412544, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.273568639390329, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.3395693620772222, - "sentence_nr": 8 + "score": 0.5735412935013542, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.11823053204772466, - "sentence_nr": 8 + "score": 0.2666372228396489, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.24615921057796505, - "sentence_nr": 8 + "score": 0.5839132669613946, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.4955056754877292, - "sentence_nr": 8 + "score": 0.1676495122493199, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.6304896503844739, - "sentence_nr": 8 + "score": 0.4477014617496043, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.3491726680217181, - "sentence_nr": 8 + "score": 0.17893757508901514, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4946434087697324, - "sentence_nr": 8 + "score": 0.4768883480817015, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.1649662542496744, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.44732894301721, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.22916123454514536, - "sentence_nr": 8 + "score": 0.19049091104611224, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4192305796685782, - "sentence_nr": 8 + "score": 0.442891478328311, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.19834976253918618, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.03586767012087445, - "sentence_nr": 8 + "score": 0.41909810865014857, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.10393938326032184, - "sentence_nr": 8 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.33864898055191395, - "sentence_nr": 8 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.23843418577408987, - "sentence_nr": 8 + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4082320855803597, - "sentence_nr": 8 + "score": 0.026158029267484995, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.26970223719007375, - "sentence_nr": 8 + "score": 0.20812209921683228, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.5172978597562362, - "sentence_nr": 8 + "score": 0.4762583476044399, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.2372622545962587, - "sentence_nr": 8 + "score": 0.30140436874237964, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.49004864454711367, - "sentence_nr": 8 + "score": 0.6234338871585586, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.38002588146683836, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.190140357671548, - "sentence_nr": 8 + "score": 0.6549174592735642, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.16776974914462364, - "sentence_nr": 8 + "score": 0.26004850047646383, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.39918336227233053, - "sentence_nr": 8 + "score": 0.5795275012848826, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.22556860731509948, - "sentence_nr": 8 + "score": 0.13582344277578873, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.4747086049005634, - "sentence_nr": 8 + "score": 0.23233050093887114, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", "task": "translation_from", "metric": "bleu", - "score": 0.23647235972003527, - "sentence_nr": 8 + "score": 0.27045803893058445, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", "task": "translation_from", "metric": "chrf", - "score": 0.41650969469918997, - "sentence_nr": 8 + "score": 0.5815404493073867, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.4067965564160296, - "sentence_nr": 8 + "score": 0.1515551103099189, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5347497635970215, - "sentence_nr": 8 + "score": 0.49455791760408774, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.42662911848025076, - "sentence_nr": 8 + "score": 0.1059786102229136, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5800596652250789, - "sentence_nr": 8 + "score": 0.2561557976916047, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.15507100728722165, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48322409198286276, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.39420326688847324, - "sentence_nr": 8 + "score": 0.14889095388455822, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6371076304605184, - "sentence_nr": 8 + "score": 0.41536400762130277, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3800528767347385, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.030501743754356173, - "sentence_nr": 8 + "score": 0.5867504754619237, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.19352792845274666, - "sentence_nr": 8 + "score": 0.3057322895905473, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.3925864519770825, - "sentence_nr": 8 + "score": 0.5337455378139571, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.49458876622696707, - "sentence_nr": 8 + "score": 0.2667836062177809, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6179893617801274, - "sentence_nr": 8 + "score": 0.4889374373828587, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.30630098078522544, - "sentence_nr": 8 + "score": 0.12869567424741998, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5439056051092116, - "sentence_nr": 8 + "score": 0.2959351858751658, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.3059872016765634, - "sentence_nr": 8 + "score": 0.12870376210497989, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5499457869553984, - "sentence_nr": 8 + "score": 0.2961496632884161, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.20076347441707354, - "sentence_nr": 8 + "score": 0.12486557620383446, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.49625515445592083, - "sentence_nr": 8 + "score": 0.2904789102327634, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.33626819961829335, - "sentence_nr": 8 + "score": 0.1059786102229136, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5531226519754557, - "sentence_nr": 8 + "score": 0.2561557976916047, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.25449674462950855, - "sentence_nr": 8 + "score": 0.10908370302374089, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.44805409822643144, - "sentence_nr": 8 + "score": 0.24648500279736443, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.18665948437666813, - "sentence_nr": 8 + "score": 0.36644357745505135, + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.4437597552815582, - "sentence_nr": 8 + "score": 0.6024962574387346, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.5004894768464765, - "sentence_nr": 8 + "score": 0.2934447092159934, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6103735933609515, - "sentence_nr": 8 + "score": 0.6498290192480378, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.48670274592792, - "sentence_nr": 8 + "score": 0.13551668809076822, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.6717971788322309, - "sentence_nr": 8 + "score": 0.4424571214083723, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2697482929758505, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.08002354055277362, - "sentence_nr": 8 + "score": 0.6331597127209819, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.417372155782838, - "sentence_nr": 8 + "score": 0.33047557311918846, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.5043502592801646, - "sentence_nr": 8 + "score": 0.6190675011020178, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.05327003793015713, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.23036636097561497, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.10369816700638204, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.06680433144407034, - "sentence_nr": 8 + "score": 0.2527691069954848, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "bleu", - "score": 0.2919280798407827, - "sentence_nr": 8 + "score": 0.11930191477839873, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "ha", "task": "translation_from", "metric": "chrf", - "score": 0.4076170046499833, - "sentence_nr": 8 + "score": 0.26295403442210813, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.41520313827696, - "sentence_nr": 8 + "score": 0.24664751641319077, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.6485212540886613, - "sentence_nr": 8 + "score": 0.48702383483350364, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.19850842371858787, - "sentence_nr": 8 + "score": 0.2103019561790119, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.43584341835040474, - "sentence_nr": 8 + "score": 0.4375454771782611, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.284161309400485, - "sentence_nr": 8 + "score": 0.21396075329540654, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5096201523229312, - "sentence_nr": 8 + "score": 0.5173735729399421, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.1134451991138546, - "sentence_nr": 8 + "score": 0.22941797870527758, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.4186167762559285, - "sentence_nr": 8 + "score": 0.42186981875418683, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.32687808175061417, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.5888362088090499, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.3228858965814099, - "sentence_nr": 8 + "score": 0.3845928641813324, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.4495802766763041, - "sentence_nr": 8 + "score": 0.6331548374111277, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.3312570339636223, - "sentence_nr": 8 + "score": 0.23919877618601593, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.45442661484375735, - "sentence_nr": 8 + "score": 0.5302876334280949, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.1992314067597761, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.38794562922191417, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.20711840252285554, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4418611388914859, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.24088562704853508, - "sentence_nr": 8 + "score": 0.1465911128169728, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.3796021685415706, - "sentence_nr": 8 + "score": 0.3079988190146739, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.17935599848863806, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3590687067479934, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.1069893156459595, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.21361888501132265, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.4317853842116786, - "sentence_nr": 8 + "score": 0.37042346597404774, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5396321094501078, - "sentence_nr": 8 + "score": 0.6504303479968405, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.43874832905672956, - "sentence_nr": 8 + "score": 0.24527664931927695, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.5882858748700781, - "sentence_nr": 8 + "score": 0.5385267265433172, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2736255491551285, + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.26652403565303173, - "sentence_nr": 8 + "score": 0.5566377410597074, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2940297877008057, + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.3270207865532903, - "sentence_nr": 8 + "score": 0.542815022290297, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.28847676899197566, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.49694236091528354, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.1509387354925112, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.26939418948215393, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.06203726059862019, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.006769280526888359, - "sentence_nr": 8 + "score": 0.06824072321166202, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "bleu", - "score": 0.35465713644381464, - "sentence_nr": 8 + "score": 0.2119156724475127, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "sd", "task": "translation_from", "metric": "chrf", - "score": 0.4911561718424494, - "sentence_nr": 8 + "score": 0.48896874501469645, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.18559542135951204, - "sentence_nr": 9 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3804842882867387, - "sentence_nr": 9 + "score": 0.7882997401328445, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5806197937310393, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 + "score": 0.7346706700987636, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.13087682931309413, - "sentence_nr": 9 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.19462952976787054, - "sentence_nr": 9 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.4855332614117322, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.013538497707846785, - "sentence_nr": 9 + "score": 0.5299556742893647, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.5411953360894813, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.6689891795277099, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3314824344065458, - "sentence_nr": 9 + "score": 0.369345079296433, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.47145091131338446, - "sentence_nr": 9 + "score": 0.4718547623527638, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 + "score": 0.38249626297768063, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 + "score": 0.40976234193505356, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3092919092976881, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3387562718376491, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4478559739568199, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.16678872216161894, - "sentence_nr": 9 + "score": 0.369345079296433, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.38156158663679846, - "sentence_nr": 9 + "score": 0.5103516764863386, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.41558132327975467, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4704204244154549, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2423441824135159, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4429509373913047, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.26279137685269766, - "sentence_nr": 9 + "score": 0.4646232199104102, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4760220740362435, - "sentence_nr": 9 + "score": 0.5678926447384061, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.21940429389247643, - "sentence_nr": 9 + "score": 0.6064630666233242, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4343280866601455, - "sentence_nr": 9 + "score": 0.6752055521830945, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.5357110024227318, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6365941772753647, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3300025916068812, - "sentence_nr": 9 + "score": 0.6960917409740967, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5052501972629104, - "sentence_nr": 9 + "score": 0.8209757784637755, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7329410355605002, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.8468261925085733, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.15371371932217712, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3322937199755749, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1824401863423467, - "sentence_nr": 9 + "score": 0.14790264259417688, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.36709433185688595, - "sentence_nr": 9 + "score": 0.27159767590045303, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3377854698776805, - "sentence_nr": 9 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.521201229892482, - "sentence_nr": 9 + "score": 0.6849386986272349, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.15122189206102096, - "sentence_nr": 9 + "score": 0.25552199116069907, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.26750110507308866, - "sentence_nr": 9 + "score": 0.3799133205289109, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 + "score": 0.3682311523733465, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.07843772989359644, - "sentence_nr": 9 + "score": 0.11739521786077453, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.1324578891826276, - "sentence_nr": 9 + "score": 0.22090491782919655, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.1892240568795935, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.08163977068875294, - "sentence_nr": 9 + "score": 0.280413108453108, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.09960206740894453, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.21445174594619118, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17431417316164047, - "sentence_nr": 9 + "score": 0.13857814312261593, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2946006716848339, - "sentence_nr": 9 + "score": 0.26136391784313634, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 + "score": 0.08635800047213174, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 + "score": 0.218109371254876, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.19794179570942658, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.19794179570942658, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.14134641571854575, - "sentence_nr": 9 + "score": 0.139800134566647, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.22948919855739472, - "sentence_nr": 9 + "score": 0.2510112235832054, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.34791594751284466, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.36381439222876993, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.20233074088759792, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3746629492952356, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17600429416656618, - "sentence_nr": 9 + "score": 0.1767874865365185, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.2712560798864272, - "sentence_nr": 9 + "score": 0.31807700660641347, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.1475503033983142, - "sentence_nr": 9 + "score": 0.37284875432797243, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.22104108935973044, - "sentence_nr": 9 + "score": 0.44888401040760956, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.16434349396840395, - "sentence_nr": 9 + "score": 0.0925329498915617, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.28582614857210975, - "sentence_nr": 9 + "score": 0.2110486160692096, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.24911274612875411, - "sentence_nr": 9 + "score": 0.2887308472548599, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3603818786794888, - "sentence_nr": 9 + "score": 0.41654484827391225, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.2971085373234417, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.41681011390626077, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1907589726146516, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.23831215045289575, - "sentence_nr": 9 + "score": 0.12453389344594705, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.17543744527808774, - "sentence_nr": 9 + "score": 0.141543757252386, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.28201016956553354, - "sentence_nr": 9 + "score": 0.2594145364221844, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.21685485833927476, - "sentence_nr": 9 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3714219747170047, - "sentence_nr": 9 + "score": 0.512762518189388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 + "score": 0.6931369519059803, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 + "score": 0.581972638479957, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.366137273378509, - "sentence_nr": 9 + "score": 0.6970914528585833, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12656494026948834, - "sentence_nr": 9 + "score": 0.44120063733294235, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3156355830822428, - "sentence_nr": 9 + "score": 0.5296624608564717, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.15110567441923345, - "sentence_nr": 9 + "score": 0.5411953360894813, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3452827306773606, - "sentence_nr": 9 + "score": 0.6689891795277099, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.30327872414714485, - "sentence_nr": 9 + "score": 0.4272870063962341, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.49804213541579834, - "sentence_nr": 9 + "score": 0.5425603129070803, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.13237645860785527, - "sentence_nr": 9 + "score": 0.5642761727828352, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3818322535970043, - "sentence_nr": 9 + "score": 0.6181373706707737, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.16091123830242154, - "sentence_nr": 9 + "score": 0.6458552885189878, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3479090205359599, - "sentence_nr": 9 + "score": 0.7468283944111381, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.18154235663145316, - "sentence_nr": 9 + "score": 0.6458552885189878, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3906877817743504, - "sentence_nr": 9 + "score": 0.7468283944111381, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12487405142186064, - "sentence_nr": 9 + "score": 0.4440750605884706, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.32817291858267583, - "sentence_nr": 9 + "score": 0.5402588602256685, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.16701570871784516, - "sentence_nr": 9 + "score": 0.5354063183898494, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4021286881032558, - "sentence_nr": 9 + "score": 0.5965096072299958, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44120063733294235, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.29383139922210444, - "sentence_nr": 9 + "score": 0.5905813392824112, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.15799783604363904, - "sentence_nr": 9 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3949243937510492, - "sentence_nr": 9 + "score": 0.6834390596430621, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.112289032173749, - "sentence_nr": 9 + "score": 0.7329410355605002, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.17726100052085036, - "sentence_nr": 9 + "score": 0.8247196817311783, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6458552885189878, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 + "score": 0.7468283944111381, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.14276716121505195, - "sentence_nr": 9 + "score": 0.43310177167002284, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3191375424862687, - "sentence_nr": 9 + "score": 0.534533410927948, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.4440750605884706, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5402588602256685, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5411953360894813, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6208023495269347, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.26481979271706185, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3212854967972961, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.47171327621770304, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4577275269488853, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6747054474171109, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.45896379476820603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.3508739523842563, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5533976153694653, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.23705266435224473, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 + "score": 0.44716007458096513, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.33150414660895594, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5307991156599932, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12503614625842938, - "sentence_nr": 9 + "score": 0.27046570133003095, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3600940511104839, - "sentence_nr": 9 + "score": 0.3736509638751225, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33752742535974617, - "sentence_nr": 9 + "score": 0.35447530946908884, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.17878540236558915, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.36568234925753484, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.17878540236558915, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.36568234925753484, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.13181313433495553, - "sentence_nr": 9 + "score": 0.25530635525095574, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31758120882708796, - "sentence_nr": 9 + "score": 0.4224404198283467, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.34674958774339726, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4348853298961402, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.2964215118800292, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4213813461128635, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.30147856626075187, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31669205297743036, - "sentence_nr": 9 + "score": 0.4883780556286986, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5805399561362194, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33753843688529356, - "sentence_nr": 9 + "score": 0.6599074620651243, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.44392090655418587, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5678926447384061, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.1463197333291977, - "sentence_nr": 9 + "score": 0.40891568776497583, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3593717322097392, - "sentence_nr": 9 + "score": 0.46522329223142805, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.7329410355605002, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.8468261925085733, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.14793378747473623, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.28942235562470353, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.11436433361427001, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.010176705289341573, - "sentence_nr": 9 + "score": 0.23221971735799607, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.13628770358024436, - "sentence_nr": 9 + "score": 0.18580985894574314, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3124983184732695, - "sentence_nr": 9 + "score": 0.3347249292100999, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.27668736912821895, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3710595252626966, - "sentence_nr": 9 + "score": 0.4414406760568898, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 + "score": 0.17200767571780612, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 + "score": 0.3723150838362789, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.18235247300784824, - "sentence_nr": 9 + "score": 0.2465659486053858, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.40779523977234755, - "sentence_nr": 9 + "score": 0.5689069160047179, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -37249,127 +43294,239 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.013915288440632284, - "sentence_nr": 9 + "score": 0.30391153783979835, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.15481575551900897, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3242496687697624, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.27011759273645686, - "sentence_nr": 9 + "score": 0.12887696534828325, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4427599081001661, - "sentence_nr": 9 + "score": 0.30114368429557287, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.15138514598766048, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.39962545473912425, - "sentence_nr": 9 + "score": 0.3237497764315872, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18012107375362474, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3031860269594791, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18693000799960027, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.29214034034017544, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.1544458227548897, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.39112369376374106, - "sentence_nr": 9 + "score": 0.3343587266874694, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.29160060199573634, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.611038619206726, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.17080052973495516, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.41295134318620164, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.25299682930744943, - "sentence_nr": 9 + "score": 0.43310177167002284, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4798320133488269, - "sentence_nr": 9 + "score": 0.6107152353968289, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.2202248274013358, - "sentence_nr": 9 + "score": 0.16369773101364, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.48474965676300186, - "sentence_nr": 9 + "score": 0.3525432121056259, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.23817261442630488, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.448286611717823, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.20679845323803403, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.45813938111627356, - "sentence_nr": 9 + "score": 0.47636494608150104, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.196046355324564, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4861187554848482, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14380553624999498, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.2597661791838868, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -37377,207 +43534,319 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.1593344703029041, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.22494952618128455, - "sentence_nr": 9 + "score": 0.09147827112247602, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4760660341798742, - "sentence_nr": 9 + "score": 0.3258762519783793, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12666372160329223, - "sentence_nr": 9 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2650373529479294, - "sentence_nr": 9 + "score": 0.6159319815107203, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 + "score": 0.28685201698226354, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1649662542496744, - "sentence_nr": 9 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3466546857451185, - "sentence_nr": 9 + "score": 0.2391308148553106, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.18180608220159192, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.01536966738773372, - "sentence_nr": 9 + "score": 0.27307753334479423, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14965975078050625, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.22213502776474325, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.14965975078050625, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2808927299109351, - "sentence_nr": 9 + "score": 0.22213502776474325, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.14326513489612383, - "sentence_nr": 9 + "score": 0.15604242268653643, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4034278533385552, - "sentence_nr": 9 + "score": 0.2255928425212252, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14068535649874328, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.22369939407063077, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14643937864373885, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.22808564446823346, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.13829446068705525, - "sentence_nr": 9 + "score": 0.14965975078050625, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.32059338352121075, - "sentence_nr": 9 + "score": 0.22213502776474325, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14965975078050625, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.22213502776474325, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.24099646203241393, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.24951692246125404, - "sentence_nr": 9 + "score": 0.40842567407749947, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5427942390000431, - "sentence_nr": 9 + "score": 0.49897421349092935, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.16521691795932783, - "sentence_nr": 9 + "score": 0.18180608220159192, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4134512022176617, - "sentence_nr": 9 + "score": 0.2765353481682209, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 + "score": 0.3160946016179871, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.3435867188688158, - "sentence_nr": 9 + "score": 0.407876439044591, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12366644075037489, - "sentence_nr": 9 + "score": 0.17466240109087192, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.37651342775995167, - "sentence_nr": 9 + "score": 0.2719194508460068, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.15980518115118317, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2292736049463283, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -37585,703 +43854,1039 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.2962222000049211, - "sentence_nr": 9 + "score": 0.12316365460790003, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.1971903602140518, - "sentence_nr": 9 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.36269646528997446, - "sentence_nr": 9 + "score": 0.35862918415512257, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.2922087191170089, - "sentence_nr": 9 + "score": 0.78479833664205, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 + "score": 0.3254455687469726, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 + "score": 0.4474512036484817, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.2244748716483542, - "sentence_nr": 9 + "score": 0.702540870003671, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3267294026204632, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.007281906895508523, - "sentence_nr": 9 + "score": 0.4510525482602028, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4855332614117322, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6448214024803467, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.26632240818317526, - "sentence_nr": 9 + "score": 0.21690365808279138, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.467076790922237, - "sentence_nr": 9 + "score": 0.33065826652002533, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18294117097472648, - "sentence_nr": 9 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4383387744769579, - "sentence_nr": 9 + "score": 0.3088155734423375, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.21333164424828907, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.33911412792045653, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11391856953132565, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2717713021928714, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17092467746295725, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4340281226634826, - "sentence_nr": 9 + "score": 0.27718461611705486, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4578226095312774, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5187171262420485, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.19895913918781652, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.38716933464075676, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3857801012055859, - "sentence_nr": 9 + "score": 0.702540870003671, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.22860414459682069, - "sentence_nr": 9 + "score": 0.6052987576779449, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.47331131010100724, - "sentence_nr": 9 + "score": 0.6657086815551626, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.5365920629514802, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6274039030337838, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17200673466668953, - "sentence_nr": 9 + "score": 0.4578226095312774, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.39948318545775324, - "sentence_nr": 9 + "score": 0.5406295999835291, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.6676191940689508, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.8138710990447064, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.18029268617744973, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.10553225565626573, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.1763116500850642, - "sentence_nr": 9 + "score": 0.002054231717337716, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.17730543118229922, - "sentence_nr": 9 + "score": 0.12286996020967837, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4201842844735916, - "sentence_nr": 9 + "score": 0.31567668741706395, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.1740044679403827, - "sentence_nr": 9 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.36375152376157177, - "sentence_nr": 9 + "score": 0.5820265218174012, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 + "score": 0.23887527917609022, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 + "score": 0.4120359948636439, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.13269353024089545, - "sentence_nr": 9 + "score": 0.36210097004176117, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.28998089836851504, - "sentence_nr": 9 + "score": 0.408098151133905, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3165014630070639, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.01357525601063516, - "sentence_nr": 9 + "score": 0.42516173623967946, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.25387990321843446, - "sentence_nr": 9 + "score": 0.36923778753333203, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4061799423946215, - "sentence_nr": 9 + "score": 0.5093945382661453, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.20298407172594946, - "sentence_nr": 9 + "score": 0.3685289119518548, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.427376330935813, - "sentence_nr": 9 + "score": 0.47062358557598893, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.353203510510529, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.382987159925022, - "sentence_nr": 9 + "score": 0.4910213297498164, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2042128370387497, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.36557640203818875, - "sentence_nr": 9 + "score": 0.3132427423570805, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.17558199612672082, - "sentence_nr": 9 + "score": 0.2001670147593021, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.41334979014850587, - "sentence_nr": 9 + "score": 0.3176647235897937, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.20947801521367798, - "sentence_nr": 9 + "score": 0.15820362165931962, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.37699245483283905, - "sentence_nr": 9 + "score": 0.2249046365436241, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.24318848592140954, - "sentence_nr": 9 + "score": 0.369345079296433, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.501343318078065, - "sentence_nr": 9 + "score": 0.4918148373059308, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.46181382506716123, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5392962545975036, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.13784906211485343, - "sentence_nr": 9 + "score": 0.4185938787651429, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3161105981607342, - "sentence_nr": 9 + "score": 0.6016367461945803, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.322788951728102, - "sentence_nr": 9 + "score": 0.4192761856401719, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.40263021320001785, - "sentence_nr": 9 + "score": 0.6279656551060881, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 + "score": 0.4753167451887016, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 + "score": 0.6372909532389948, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.1405026510197826, - "sentence_nr": 9 + "score": 0.38317923930200504, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.24785258181936404, - "sentence_nr": 9 + "score": 0.47975624978837655, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5426924329239604, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 + "score": 0.578377079605085, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.16341242314728613, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.30086466715081783, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2054194471318506, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.25678404806291744, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.37045149029437513, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.3882967156327901, - "sentence_nr": 9 + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7246473808162345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6502428441722727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1243018504102695, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.32950116238735283, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.5198707241967666, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498183531322053, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.20664181816537014, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4287492504761661, - "sentence_nr": 9 + "score": 0.371359687688326, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15626231814206226, - "sentence_nr": 9 + "score": 0.23660362391696813, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2918712789926548, - "sentence_nr": 9 + "score": 0.34152697838249696, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3092919092976881, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3092919092976881, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15325316503089068, - "sentence_nr": 9 + "score": 0.369345079296433, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2756316951639811, - "sentence_nr": 9 + "score": 0.5103516764863386, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4587032440161705, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5232464635424798, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.23603721040376854, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4074433070802772, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2778060655126336, - "sentence_nr": 9 + "score": 0.702540870003671, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.6052987576779449, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6657086815551626, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5357110024227318, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3210853623565359, - "sentence_nr": 9 + "score": 0.6365941772753647, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.009559007108143848, - "sentence_nr": 9 + "score": 0.36763082847636347, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.05937666456658802, - "sentence_nr": 9 + "score": 0.45637140510576385, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.6960917409740967, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.8209757784637755, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.28306950244125495, - "sentence_nr": 9 + "score": 0.2651756541673285, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.14063630555225284, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.24531520458611372, - "sentence_nr": 9 + "score": 0.16935976352352106, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.3264287329357334, - "sentence_nr": 9 + "score": 0.31268514922728713, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.41662443172249786, - "sentence_nr": 9 + "score": 0.41990725085948355, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.16925466459550803, - "sentence_nr": 9 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.35912398848424326, - "sentence_nr": 9 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.21547697432588886, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -38289,335 +44894,559 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.18039960295364865, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.13602652550459576, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3002035243994678, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.21397099133614067, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3568171392601981, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4388504279172877, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.808070563320424, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4841156774078945, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8256726071164937, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.14134641571854575, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3078571099929154, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.18061023425907288, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3684591961152223, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.15658994837053716, - "sentence_nr": 9 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3084004707364603, - "sentence_nr": 9 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.20215771603666896, - "sentence_nr": 9 + "score": 0.8363600587440573, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.34483322672745376, - "sentence_nr": 9 + "score": 0.9912737182609732, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.16165057948216605, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.35172210628524053, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.8895260356363631, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9215559912711291, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6018154975998465, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.012458960343878354, - "sentence_nr": 9 + "score": 0.7669980679050217, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.20053583653512705, - "sentence_nr": 9 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3585550644386862, - "sentence_nr": 9 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.2327080490816513, - "sentence_nr": 9 + "score": 0.47320724783393625, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4213315211213489, - "sentence_nr": 9 + "score": 0.5833006006517599, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3556521383601747, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.24362353508932386, - "sentence_nr": 9 + "score": 0.594830811413066, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.14005830765988142, - "sentence_nr": 9 + "score": 0.5406964703993759, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.28271314565258726, - "sentence_nr": 9 + "score": 0.5964595329953364, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2575863752355164, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.01943377856541192, - "sentence_nr": 9 + "score": 0.3717184743596148, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.13585813340607317, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3021348975068449, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.24250789663911215, - "sentence_nr": 9 + "score": 0.22481074167380632, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.44127658727390434, - "sentence_nr": 9 + "score": 0.3761108267186685, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.2298971389591186, - "sentence_nr": 9 + "score": 0.34589895849033114, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.45764667682340326, - "sentence_nr": 9 + "score": 0.44792042673107413, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.14397786194708656, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3331521962085306, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.14587521254752497, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3305597683917539, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.22183437291807073, - "sentence_nr": 9 + "score": 0.08197539732074254, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4690517750319636, - "sentence_nr": 9 + "score": 0.2552663483401067, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.33018775735516415, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5054531554717974, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.15146316523107298, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4192400624404484, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.1743771229292808, - "sentence_nr": 9 + "score": 0.3032929624979452, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.46893502773403367, - "sentence_nr": 9 + "score": 0.5077415447889289, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.15089318423122547, - "sentence_nr": 9 + "score": 0.43994654743790196, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.4354703980715437, - "sentence_nr": 9 + "score": 0.5758276578902723, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.3730786950813075, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.47401660085208147, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.3931382365355541, - "sentence_nr": 9 + "score": 0.38223593598574, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.5429286385993002, - "sentence_nr": 9 + "score": 0.5729676575997464, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.38223593598574, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5686739421910859, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.21711852081087685, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.34076978472998576, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -38625,95 +45454,111 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.007237155276460672, - "sentence_nr": 9 + "score": 0.19328966457045355, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "translation_from", "metric": "bleu", - "score": 0.22233922818300378, - "sentence_nr": 9 + "score": 0.20477156411200437, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "translation_from", "metric": "chrf", - "score": 0.46750271079023087, - "sentence_nr": 9 + "score": 0.3371728179865314, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.08939270118279458, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2568191876426829, - "sentence_nr": 9 + "score": 0.2952752522340665, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.21629114799587432, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.28135849152758385, - "sentence_nr": 9 + "score": 0.3542320138389837, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.13585608692428647, - "sentence_nr": 9 + "score": 0.21993356630819796, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.24602093467402117, - "sentence_nr": 9 + "score": 0.3822901360655399, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.12212865548711085, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.014749122939855126, - "sentence_nr": 9 + "score": 0.27604929504751197, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.15804606946648236, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.24864035916056065, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", @@ -38721,111 +45566,207 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.28355113133330917, - "sentence_nr": 9 + "score": 0.007934677500708292, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.19984607356962125, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2769725060346048, - "sentence_nr": 9 + "score": 0.29326031481052006, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2674274417945491, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.29066274199907366, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.08939270118279458, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.22585782564798598, - "sentence_nr": 9 + "score": 0.2952752522340665, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.1307118752097732, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2556075727355655, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.19545984328607466, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3260504123048448, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.17729842264695017, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2753348107620908, - "sentence_nr": 9 + "score": 0.3337514618651578, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.16341242314728613, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.30227683871289934, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.17095864413061523, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.2462954618610128, - "sentence_nr": 9 + "score": 0.2805749649536233, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.13952118378975725, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2962794525145751, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.1307118752097732, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2771420974564079, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.25512324153300714, - "sentence_nr": 9 + "score": 0.1907005287801752, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -38833,511 +45774,751 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.006249447069096045, - "sentence_nr": 9 + "score": 0.028735632183908046, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.17670087745185423, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", "task": "translation_from", "metric": "chrf", - "score": 0.17131793456589922, - "sentence_nr": 9 + "score": 0.3136010782144669, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.12832055613623328, - "sentence_nr": 9 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.19433944404681203, - "sentence_nr": 9 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.14482189302397735, - "sentence_nr": 9 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.2913876815877049, - "sentence_nr": 9 + "score": 0.728208634600343, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1217802106941195, - "sentence_nr": 9 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.19314598726036322, - "sentence_nr": 9 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.17662903260733673, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.022303919896869945, - "sentence_nr": 9 + "score": 0.359573626731952, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.18723860296151318, - "sentence_nr": 9 + "score": 0.30826423742301434, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3744699636940152, - "sentence_nr": 9 + "score": 0.4128975675984797, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.13502367316243039, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3238973846683935, - "sentence_nr": 9 + "score": 0.299859745321103, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1361658548186748, - "sentence_nr": 9 + "score": 0.27274191069381915, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3295167855876769, - "sentence_nr": 9 + "score": 0.37436438971100644, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.25753379048739855, - "sentence_nr": 9 + "score": 0.1333497993210919, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.42531177875262893, - "sentence_nr": 9 + "score": 0.2559597722970139, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.12819825042984195, - "sentence_nr": 9 + "score": 0.1333497993210919, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.36515328991507745, - "sentence_nr": 9 + "score": 0.2559597722970139, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.526589137558171, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.5667866238125795, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.21743769222637532, - "sentence_nr": 9 + "score": 0.17981949418962662, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.4131100936190792, - "sentence_nr": 9 + "score": 0.3488441223298029, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.11914562165195522, - "sentence_nr": 9 + "score": 0.17857033414091059, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.36399317085467314, - "sentence_nr": 9 + "score": 0.32632435359302164, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.1302352098354987, - "sentence_nr": 9 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.326035134708999, - "sentence_nr": 9 + "score": 0.702540870003671, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.14410670132605607, - "sentence_nr": 9 + "score": 0.5805399561362194, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.24025207593480963, - "sentence_nr": 9 + "score": 0.6599074620651243, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.16306957103469613, - "sentence_nr": 9 + "score": 0.7155411017347171, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.14219389639501667, - "sentence_nr": 9 + "score": 0.6960917409740967, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.3550945020345845, - "sentence_nr": 9 + "score": 0.8209757784637755, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.43070794242402144, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "id", "task": "translation_from", "metric": "chrf", - "score": 0.013501937941345124, - "sentence_nr": 9 + "score": 0.5011615373298621, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "microsoft/phi-4", + "bcp_47": "id", "task": "translation_from", "metric": "bleu", - "score": 0.12832055613623328, - "sentence_nr": 9 + "score": 0.19665864691817672, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.36913377112413776, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.019516573752972968, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.22419056820298167, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3577306040313533, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.583526016818016, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6994652193905146, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.27405612859390877, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4639958592456083, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4390960897971484, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.541742178821102, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.13232291594986312, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.301901669683193, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.268250341087026, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.37630621020765986, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.3166144686275811, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.239266757938952, - "sentence_nr": 9 + "score": 0.4171828599209745, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2434330428491034, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.37693028676849333, - "sentence_nr": 9 + "score": 0.31858900384957733, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.25430316746203985, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4159778448357134, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.25381494737245897, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4177779282578606, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.15084825228964133, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3791177761741048, - "sentence_nr": 9 + "score": 0.3284886849880412, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1508920852219557, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.32281122236013104, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.28855357268630083, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.44235337548914555, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3767372261720185, - "sentence_nr": 9 + "score": 0.702540870003671, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3160946016179871, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3284649068899757, - "sentence_nr": 9 + "score": 0.4791760084407935, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.20988785322505515, - "sentence_nr": 9 + "score": 0.4452652851854937, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4323069807401831, - "sentence_nr": 9 + "score": 0.5889782977654896, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2677353447271197, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.3397876134677058, - "sentence_nr": 9 + "score": 0.3932141708916282, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.27326653732219497, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4001542443022677, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.23359433863044574, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.36161782617294214, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.1536690667279411, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.009628007582726738, - "sentence_nr": 9 + "score": 0.23373462830676886, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", "task": "translation_from", "metric": "bleu", - "score": 0.20247469739337648, - "sentence_nr": 9 + "score": 0.16800102974369996, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", "task": "translation_from", "metric": "chrf", - "score": 0.4418847146430419, - "sentence_nr": 9 + "score": 0.3319781987745275, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.6052987576779449, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 + "score": 0.643602170728296, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.13004800471424346, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 + "score": 0.28217142159025543, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.8827916928185874, - "sentence_nr": 9 + "score": 0.3924259174695316, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.9278293769424701, - "sentence_nr": 9 + "score": 0.45050557152077386, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.5919743410620021, - "sentence_nr": 9 + "score": 0.10601317434781207, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8142101616656354, - "sentence_nr": 9 + "score": 0.2344095627038401, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.15450079547146164, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.25100896225203706, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", @@ -39345,15748 +46526,17844 @@ "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.2762822897608569, - "sentence_nr": 9 + "score": 0.248781805015534, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.24007528246707907, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.24942094354139677, - "sentence_nr": 9 + "score": 0.31084467045503017, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.12416659058610632, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "score": 0.23141028782228498, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.12416659058610632, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "score": 0.23141028782228498, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.6642718379939968, - "sentence_nr": 9 + "score": 0.12769027061800275, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.7768492311706325, - "sentence_nr": 9 + "score": 0.21844360831325868, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.26853783353673283, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.41566337942217346, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "score": 0.16599711114472157, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.4246183605185108, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "score": 0.5497460511936695, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "translation_from", "metric": "bleu", - "score": 0.7660237942267061, - "sentence_nr": 9 + "score": 0.22523697594538705, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "translation_from", "metric": "chrf", - "score": 0.8523393041110139, - "sentence_nr": 9 + "score": 0.3911595396314754, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.6358921902612438, - "sentence_nr": 0 + "score": 0.23705266435224473, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.8041899227402122, - "sentence_nr": 0 + "score": 0.3838188339168412, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.6299285159340671, - "sentence_nr": 0 + "score": 0.1852972751417938, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7993134129243716, - "sentence_nr": 0 + "score": 0.36660412101424933, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.710159574003633, - "sentence_nr": 0 + "score": 0.16341242314728613, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.8462481747979111, - "sentence_nr": 0 + "score": 0.33893747430370086, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5487830136896633, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.77238965036654, - "sentence_nr": 0 + "score": 0.2469646873408414, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.48244543610473856, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7294391805717774, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.5745954681260859, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7920051188244848, - "sentence_nr": 0 + "score": 0.2653698485201136, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6012475603804444, - "sentence_nr": 0 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.7990339788905771, - "sentence_nr": 0 + "score": 0.5785251190053333, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.5938611220262585, - "sentence_nr": 0 + "score": 0.37821486365532614, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.8079474861665713, - "sentence_nr": 0 + "score": 0.4718665834023439, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6660677740125452, - "sentence_nr": 0 + "score": 0.35423985843000033, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.8293798371335214, - "sentence_nr": 0 + "score": 0.4293667924436175, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6012475603804444, - "sentence_nr": 0 + "score": 0.2933705789311311, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.8102198011293434, - "sentence_nr": 0 + "score": 0.35570110758127277, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.5718247506430171, - "sentence_nr": 0 + "score": 0.41558132327975467, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.7570613392550647, - "sentence_nr": 0 + "score": 0.4683680115570169, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6241924127610678, - "sentence_nr": 0 + "score": 0.3730786950813075, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.8031006153647919, - "sentence_nr": 0 + "score": 0.4080871920416184, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6372502110149713, - "sentence_nr": 0 + "score": 0.3674668904964848, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.8187019874664503, - "sentence_nr": 0 + "score": 0.40975628086142124, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.35059076445515835, - "sentence_nr": 0 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.40219803477483124, - "sentence_nr": 0 + "score": 0.4104273001940124, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.41316127706749806, - "sentence_nr": 0 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4430321339435623, - "sentence_nr": 0 + "score": 0.4104273001940124, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3993284843242707, - "sentence_nr": 0 + "score": 0.3931807596037881, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4224738565076288, - "sentence_nr": 0 + "score": 0.41587358041151196, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2908087026261561, - "sentence_nr": 0 + "score": 0.41546060026113085, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3411361400094189, - "sentence_nr": 0 + "score": 0.46431680824298277, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3504650671187503, - "sentence_nr": 0 + "score": 0.22128776529156546, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3985617531166433, - "sentence_nr": 0 + "score": 0.3299817815368062, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3572514590810421, - "sentence_nr": 0 + "score": 0.45167594566243024, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.40312319760122833, - "sentence_nr": 0 + "score": 0.6430302720642738, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2996868226086902, - "sentence_nr": 0 + "score": 0.4578226095312774, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3355531727847081, - "sentence_nr": 0 + "score": 0.5429809487027987, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4317270147628918, - "sentence_nr": 0 + "score": 0.17679588126795498, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4476891051893246, - "sentence_nr": 0 + "score": 0.4098986063548376, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4199243020508202, - "sentence_nr": 0 + "score": 0.3675058901988579, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4310330650643179, - "sentence_nr": 0 + "score": 0.445171638403697, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.3811228251371368, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.45807534399944205, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.38146085172952343, - "sentence_nr": 0 + "score": 0.25567957494892185, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.40030269579783606, - "sentence_nr": 0 + "score": 0.3431135836501165, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2613520653232399, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.292974388325607, - "sentence_nr": 0 + "score": 0.3196352513221046, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.38876512474558916, - "sentence_nr": 0 + "score": 0.29107087297820256, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.41342876789412997, - "sentence_nr": 0 + "score": 0.39517833279310743, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.8780634320789833, - "sentence_nr": 0 + "score": 0.38694317759010316, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.926946700115022, - "sentence_nr": 0 + "score": 0.45827711860455167, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.7964573357809173, - "sentence_nr": 0 + "score": 0.3699382260470039, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.8458636471716781, - "sentence_nr": 0 + "score": 0.4032851361478274, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 0 + "score": 0.42378190548671596, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "score": 0.47335507275218824, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.9452996322890763, - "sentence_nr": 0 + "score": 0.3763278728427448, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9463396364218181, - "sentence_nr": 0 + "score": 0.39009457811977266, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.409211292187266, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.44521767975773685, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.8562379115188704, - "sentence_nr": 0 + "score": 0.409211292187266, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.914880147320643, - "sentence_nr": 0 + "score": 0.462803416015263, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.3763743474188506, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.4120099199050514, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.39989254486815534, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.39989254486815534, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.6537803976048806, - "sentence_nr": 0 + "score": 0.34791594751284466, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7742226743967544, - "sentence_nr": 0 + "score": 0.40864368085475805, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.44717887287377617, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.21271275367465078, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.29643115550453636, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.738238064391125, - "sentence_nr": 0 + "score": 0.45267625566586717, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.8637738769684485, - "sentence_nr": 0 + "score": 0.6356631092098544, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 0 + "score": 0.45167594566243024, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "score": 0.5192073147540899, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.40673971192998765, - "sentence_nr": 0 + "score": 0.4056782022243561, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6897190926100627, - "sentence_nr": 0 + "score": 0.5237454577692897, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.3707525915417785, - "sentence_nr": 0 + "score": 0.409211292187266, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6481906761834414, - "sentence_nr": 0 + "score": 0.44289452305459603, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4405434565828979, - "sentence_nr": 0 + "score": 0.4245970617334277, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6872423435487918, - "sentence_nr": 0 + "score": 0.4579580788935747, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.34070519401434163, - "sentence_nr": 0 + "score": 0.27334594211112967, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6376396416993303, - "sentence_nr": 0 + "score": 0.34926643308225325, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.38318568210251663, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6625830408830621, - "sentence_nr": 0 + "score": 0.139781837804502, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.35601247064914876, - "sentence_nr": 0 + "score": 0.2134385691462796, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6528728847159075, - "sentence_nr": 0 + "score": 0.296993231533869, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3535276144718208, - "sentence_nr": 0 + "score": 0.4115167991342047, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6399338911163, - "sentence_nr": 0 + "score": 0.5649900101054287, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.42021658469726225, - "sentence_nr": 0 + "score": 0.45167594566243024, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7002995337928327, - "sentence_nr": 0 + "score": 0.5169677927619225, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.29793763405666984, - "sentence_nr": 0 + "score": 0.2706805630983137, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5878658443031616, - "sentence_nr": 0 + "score": 0.38186806613291924, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.1890425467840326, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3513019690066663, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2737856702715042, - "sentence_nr": 0 + "score": 0.22455129433835885, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6091441790112126, - "sentence_nr": 0 + "score": 0.3153073204562459, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2060740184460064, - "sentence_nr": 0 + "score": 0.20690996611611379, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5566122985381202, - "sentence_nr": 0 + "score": 0.30036193380302606, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3764145740138264, - "sentence_nr": 0 + "score": 0.23270804908165135, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.660406350984819, - "sentence_nr": 0 + "score": 0.3478589640284733, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.703373719677874, - "sentence_nr": 0 + "score": 0.1925775824064372, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7784050705257474, - "sentence_nr": 0 + "score": 0.3523071217485035, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.469958733898233, - "sentence_nr": 0 + "score": 0.1925775824064372, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5843756060033074, - "sentence_nr": 0 + "score": 0.3523071217485035, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.6034601376302852, - "sentence_nr": 0 + "score": 0.11234905986715489, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7074074363255227, - "sentence_nr": 0 + "score": 0.15225251521949978, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5200692650497809, - "sentence_nr": 0 + "score": 0.22534921437308478, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6586847274336591, - "sentence_nr": 0 + "score": 0.30558055644143256, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.668895061203786, - "sentence_nr": 0 + "score": 0.13461801293778908, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7856645013611931, - "sentence_nr": 0 + "score": 0.32028794303934305, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5724622291345857, - "sentence_nr": 0 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6818279156433621, - "sentence_nr": 0 + "score": 0.702540870003671, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5439803529976158, - "sentence_nr": 0 + "score": 0.38694317759010316, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.657598922173703, - "sentence_nr": 0 + "score": 0.4758624095139857, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.731132155274915, - "sentence_nr": 0 + "score": 0.13733894353973466, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.8118306465406135, - "sentence_nr": 0 + "score": 0.26356016327430454, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5447800851151646, - "sentence_nr": 0 + "score": 0.15878174295086994, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6845859707632784, - "sentence_nr": 0 + "score": 0.3163237075880393, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5073374020380702, - "sentence_nr": 0 + "score": 0.22095731396358687, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6713451965832894, - "sentence_nr": 0 + "score": 0.33443277609714755, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.63457045351243, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.760139991277541, - "sentence_nr": 0 + "score": 0.2113573874732524, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2747017431249852, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4456826256200505, - "sentence_nr": 0 + "score": 0.014379215591354156, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4834220366915352, - "sentence_nr": 0 + "score": 0.12943648490176665, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.678862671476654, - "sentence_nr": 0 + "score": 0.29942831535046555, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3489926819498492, - "sentence_nr": 0 + "score": 0.24489516889906388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5715668842319502, - "sentence_nr": 0 + "score": 0.409369762090413, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2786169604662155, - "sentence_nr": 0 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5267252236203236, - "sentence_nr": 0 + "score": 0.3780009826926042, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.35446322216812387, - "sentence_nr": 0 + "score": 0.26696378876165927, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5208748527454148, - "sentence_nr": 0 + "score": 0.3581548569027847, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2679728611808951, - "sentence_nr": 0 + "score": 0.1262744724314408, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.466691372759197, - "sentence_nr": 0 + "score": 0.25266688865379994, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3595597536132021, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5042608116537557, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3842600770501223, - "sentence_nr": 0 + "score": 0.1961887304255141, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.6030559477915464, - "sentence_nr": 0 + "score": 0.31368406470821447, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2676232320051144, - "sentence_nr": 0 + "score": 0.22453002699007485, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5440246804235981, - "sentence_nr": 0 + "score": 0.3354597455808525, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1491744505572466, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.23672011749048205, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1491744505572466, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.23672011749048205, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2328598163544389, - "sentence_nr": 0 + "score": 0.12061450720815534, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.46604753989124215, - "sentence_nr": 0 + "score": 0.28227862122593256, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2448987533565238, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21936644511443132, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3266422289425898, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.36488083606907545, - "sentence_nr": 0 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5811650865491297, - "sentence_nr": 0 + "score": 0.702540870003671, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3249989390135794, - "sentence_nr": 0 + "score": 0.3011172971082868, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5532261012182782, - "sentence_nr": 0 + "score": 0.3734627919313833, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.04043358226234485, - "sentence_nr": 0 + "score": 0.26518122980477765, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.178130317890244, - "sentence_nr": 0 + "score": 0.514846875413034, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.31400830186120793, - "sentence_nr": 0 + "score": 0.2082376263771737, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5436299115609682, - "sentence_nr": 0 + "score": 0.3665582908776792, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3159148237572417, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5258141913574198, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0034435261707988977, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.180038135256147, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.42760668286140896, - "sentence_nr": 0 + "score": 0.010917030567685585, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2702404890575711, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.505948742808373, - "sentence_nr": 0 + "score": 0.22316698150381944, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3410244689880313, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5816669416914216, - "sentence_nr": 0 + "score": 0.3989952325675248, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4234343012313773, - "sentence_nr": 0 + "score": 0.3925121365052661, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6625289905598352, - "sentence_nr": 0 + "score": 0.47788592802001717, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3885765192359091, - "sentence_nr": 0 + "score": 0.35423985843000033, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6554470157301392, - "sentence_nr": 0 + "score": 0.4401068255722377, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.38108864298853723, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6485553379227472, - "sentence_nr": 0 + "score": 0.19263684669277223, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.16368118043487417, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3476956163805434, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.385626093679484, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6500036127408896, - "sentence_nr": 0 + "score": 0.2187746652191371, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3765213224289163, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6469521424555786, - "sentence_nr": 0 + "score": 0.18341524527739528, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.36247466608675993, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6011484151165629, - "sentence_nr": 0 + "score": 0.14567343886386178, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4326013853051836, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6609703936604562, - "sentence_nr": 0 + "score": 0.15703032248735793, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4079926989572759, - "sentence_nr": 0 + "score": 0.3020103087706165, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6592699047005666, - "sentence_nr": 0 + "score": 0.36625663694634303, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.30494536158123264, - "sentence_nr": 0 + "score": 0.3185739648783928, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5857538582551342, - "sentence_nr": 0 + "score": 0.42685007822661736, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.26075652499067425, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5605305670545515, - "sentence_nr": 0 + "score": 0.26234693671182024, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.13339786348528015, - "sentence_nr": 0 + "score": 0.4772894233335957, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.46778689835182324, - "sentence_nr": 0 + "score": 0.6272300080155604, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2602768294269028, - "sentence_nr": 0 + "score": 0.2852650068463884, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5310567541651178, - "sentence_nr": 0 + "score": 0.472092904441211, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4324680011853555, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5877600878871951, - "sentence_nr": 0 + "score": 0.2523841428380167, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4493940083619696, - "sentence_nr": 0 + "score": 0.11146727460890443, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6230960824462234, - "sentence_nr": 0 + "score": 0.23969027175152666, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4116575552858724, - "sentence_nr": 0 + "score": 0.14100024578768863, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5665759692366567, - "sentence_nr": 0 + "score": 0.33713039237182474, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3533147318401534, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5972951640947346, - "sentence_nr": 0 + "score": 0.1820475071426938, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.37929353357736867, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6046831629978785, - "sentence_nr": 0 + "score": 0.1123102665458642, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.44219732271776674, - "sentence_nr": 0 + "score": 0.17729842264695017, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6193429426274062, - "sentence_nr": 0 + "score": 0.32305168353427943, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.41852674506584964, - "sentence_nr": 0 + "score": 0.15896519992112562, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6035836275599532, - "sentence_nr": 0 + "score": 0.29513999460654694, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.4839792901878845, - "sentence_nr": 0 + "score": 0.1423412184218882, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6380327835059832, - "sentence_nr": 0 + "score": 0.2596718628394258, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.433056028408153, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6068335862669254, - "sentence_nr": 0 + "score": 0.17615962296513688, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.24431474547499252, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.37696437834356655, - "sentence_nr": 0 + "score": 0.3120848453730729, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5617832488367239, - "sentence_nr": 0 + "score": 0.3640187353243162, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.1810501938660849, - "sentence_nr": 0 + "score": 0.11856660123276004, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4154005351684647, - "sentence_nr": 0 + "score": 0.22066843647070425, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5018386916018573, - "sentence_nr": 0 + "score": 0.12789533377801793, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6673891538739279, - "sentence_nr": 0 + "score": 0.2283763803651714, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6947677373756656, - "sentence_nr": 0 + "score": 0.10511846841633776, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7941300666655116, - "sentence_nr": 0 + "score": 0.20849775250910704, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6412098671661826, - "sentence_nr": 0 + "score": 0.10511846841633776, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7665040244283648, - "sentence_nr": 0 + "score": 0.20849775250910704, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6045639360711837, - "sentence_nr": 0 + "score": 0.13502367316243039, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7576570567798335, - "sentence_nr": 0 + "score": 0.2597905925100196, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5438238038060724, - "sentence_nr": 0 + "score": 0.13834368456410945, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7060850657954441, - "sentence_nr": 0 + "score": 0.2622351749057803, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6832136298239752, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7617777911358293, - "sentence_nr": 0 + "score": 0.17334819740046747, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6638859619095425, - "sentence_nr": 0 + "score": 0.434975077577922, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7874224590682172, - "sentence_nr": 0 + "score": 0.44927463951082325, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6543739381048754, - "sentence_nr": 0 + "score": 0.16805936904720342, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7768522458527362, - "sentence_nr": 0 + "score": 0.3269489741572828, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6699094720554168, - "sentence_nr": 0 + "score": 0.16279348731624776, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8022876242275274, - "sentence_nr": 0 + "score": 0.3149639290246331, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5886489119980793, - "sentence_nr": 0 + "score": 0.28200049157537727, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7611944709376643, - "sentence_nr": 0 + "score": 0.40682121687319617, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5162974106233954, - "sentence_nr": 0 + "score": 0.28200049157537727, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.748545216109632, - "sentence_nr": 0 + "score": 0.3809375829720542, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6561309661336588, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7849652413082676, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.24311976929452217, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5332455436874994, - "sentence_nr": 0 + "score": 0.20069190971412876, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5821415139431849, - "sentence_nr": 0 + "score": 0.12409597120849801, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7329539842616807, - "sentence_nr": 0 + "score": 0.19449255248446348, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.3861375213265022, - "sentence_nr": 0 + "score": 0.3571150500823898, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5122109329134508, - "sentence_nr": 0 + "score": 0.5018967494794737, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.32539921259497445, - "sentence_nr": 0 + "score": 0.3572188192648703, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5133457276293165, - "sentence_nr": 0 + "score": 0.45381175288762937, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.375079512706724, - "sentence_nr": 0 + "score": 0.3572188192648703, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5371301483272257, - "sentence_nr": 0 + "score": 0.42937064888927773, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.48456463733283883, - "sentence_nr": 0 + "score": 0.2513073726775429, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5906105668854662, - "sentence_nr": 0 + "score": 0.3798674638470122, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3576865471454043, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.47662407876184354, - "sentence_nr": 0 + "score": 0.28089241320678904, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5978297795375753, - "sentence_nr": 0 + "score": 0.39241665461937186, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4845227999608418, - "sentence_nr": 0 + "score": 0.3571150500823898, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5968050469845498, - "sentence_nr": 0 + "score": 0.44642876819396304, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4494703452336724, - "sentence_nr": 0 + "score": 0.3359540718229689, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5696298539086213, - "sentence_nr": 0 + "score": 0.4335064729510061, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.44981308897808336, - "sentence_nr": 0 + "score": 0.3926492355497174, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6151723374264357, - "sentence_nr": 0 + "score": 0.4617232533456601, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4295014616287586, - "sentence_nr": 0 + "score": 0.27274191069381915, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5957510678657648, - "sentence_nr": 0 + "score": 0.39756349135906077, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.25381494737245897, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.3194196588930621, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.47727404239076743, - "sentence_nr": 0 + "score": 0.37590194925806086, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6081867525552255, - "sentence_nr": 0 + "score": 0.4642775123967649, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.01656048993031311, - "sentence_nr": 0 + "score": 0.4470252726010778, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.11323797713183678, - "sentence_nr": 0 + "score": 0.5551045432161744, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4124136266900752, - "sentence_nr": 0 + "score": 0.25951859031182145, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5714981155807188, - "sentence_nr": 0 + "score": 0.34406582181104034, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4354194543126476, - "sentence_nr": 0 + "score": 0.13232291594986312, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6656175329857803, - "sentence_nr": 0 + "score": 0.3103406767609728, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.5941815558294462, - "sentence_nr": 0 + "score": 0.2295748846661433, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7983203558832153, - "sentence_nr": 0 + "score": 0.3277581848986239, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4524360012660941, - "sentence_nr": 0 + "score": 0.27334594211112967, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6715594086896963, - "sentence_nr": 0 + "score": 0.4045014812005364, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.3355428780074198, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6348737822745005, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.35291113737574475, - "sentence_nr": 0 + "score": 0.11902001907030836, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6466347966614083, - "sentence_nr": 0 + "score": 0.2714975491916872, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.47269414327373943, - "sentence_nr": 0 + "score": 0.19040700845445938, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7451099574206652, - "sentence_nr": 0 + "score": 0.354176495487078, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4216321717480383, - "sentence_nr": 0 + "score": 0.10682827247639556, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6897801155729197, - "sentence_nr": 0 + "score": 0.21551117313912851, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.6036019309695121, - "sentence_nr": 0 + "score": 0.07425055521504613, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.7801644741948762, - "sentence_nr": 0 + "score": 0.18122341046764998, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.43249073282965117, - "sentence_nr": 0 + "score": 0.16352670859125373, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6444897357478733, - "sentence_nr": 0 + "score": 0.26406333983187025, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.28607621833944535, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6463343859934777, - "sentence_nr": 0 + "score": 0.14974959199825547, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.415412929081021, - "sentence_nr": 0 + "score": 0.10640850690356463, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6437233280372863, - "sentence_nr": 0 + "score": 0.1954606751850304, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.27979942810555614, - "sentence_nr": 0 + "score": 0.1022875701616399, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5926090214839685, - "sentence_nr": 0 + "score": 0.222885061328114, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.2995846558655927, - "sentence_nr": 0 + "score": 0.08968235248346597, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.599404823793189, - "sentence_nr": 0 + "score": 0.2175311081388801, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3983045920261205, - "sentence_nr": 0 + "score": 0.07174630278990472, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6521777108605036, - "sentence_nr": 0 + "score": 0.17633434609960705, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.268203877206376, - "sentence_nr": 0 + "score": 0.06695900686562914, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5791297455379081, - "sentence_nr": 0 + "score": 0.17180011357991665, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.22766536738739604, - "sentence_nr": 0 + "score": 0.10640850690356463, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5321260120854782, - "sentence_nr": 0 + "score": 0.152668380659781, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.14981855747310632, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5073561650857479, - "sentence_nr": 0 + "score": 0.17259582659449696, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.40263336117444953, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.656129562811693, - "sentence_nr": 0 + "score": 0.09918661835916037, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4143449478847806, - "sentence_nr": 0 + "score": 0.10601317434781207, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6736569430464404, - "sentence_nr": 0 + "score": 0.2251385457970532, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.20430195455630867, - "sentence_nr": 0 + "score": 0.16330657473945515, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5336103118914343, - "sentence_nr": 0 + "score": 0.33688142820108186, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4715421308516199, - "sentence_nr": 0 + "score": 0.09812163258584553, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.7046296108422225, - "sentence_nr": 0 + "score": 0.23909785111906673, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.38187621052323667, - "sentence_nr": 0 + "score": 0.10401577613691954, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6423162801762098, - "sentence_nr": 0 + "score": 0.16184347717072042, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.10601317434781207, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.2550511802903305, - "sentence_nr": 0 + "score": 0.2305932305892788, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3491792142373769, - "sentence_nr": 0 + "score": 0.10851354579652327, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6341002242155772, - "sentence_nr": 0 + "score": 0.1503151900202639, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.001753155680224404, - "sentence_nr": 0 + "score": 0.006827911047017742, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.19348048287912908, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", - "metric": "chrf", - "score": 0.5498004622015639, - "sentence_nr": 0 + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2164910348876327, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.17329990217896798, - "sentence_nr": 0 + "score": 0.2774527633525211, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.583781848253705, - "sentence_nr": 0 + "score": 0.4358323759361012, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3373556859342653, - "sentence_nr": 0 + "score": 0.1978585723043446, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6738789170291255, - "sentence_nr": 0 + "score": 0.3527599187160617, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.4059531821242849, - "sentence_nr": 0 + "score": 0.3021375397356768, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7392702727394752, - "sentence_nr": 0 + "score": 0.460354013765958, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.4658089028058827, - "sentence_nr": 0 + "score": 0.12991916506579942, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7584810978753719, - "sentence_nr": 0 + "score": 0.27889392608860697, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.10765326248076237, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.2268211743213014, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3725685850787146, - "sentence_nr": 0 + "score": 0.1029370476768733, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6944058070280722, - "sentence_nr": 0 + "score": 0.25083473307234855, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3720001389308944, - "sentence_nr": 0 + "score": 0.1472462377094902, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7013845085492982, - "sentence_nr": 0 + "score": 0.30525310195831357, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.10682827247639556, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.18978045999305448, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.09941490945601678, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.22897188670060376, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3649036594689345, - "sentence_nr": 0 + "score": 0.1959280139287724, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6887265942100023, - "sentence_nr": 0 + "score": 0.36708131749832384, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.36343365059404575, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.432454547955832, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.15326741815269776, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.31415720375698164, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.4058364743511898, - "sentence_nr": 0 + "score": 0.5805399561362194, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7590590041728482, - "sentence_nr": 0 + "score": 0.6804343591858393, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.33490167163730483, - "sentence_nr": 0 + "score": 0.13796620851017113, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6738850345838133, - "sentence_nr": 0 + "score": 0.3011443479254443, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 0 + "score": 0.3166144686275811, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.39315143982598805, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.45475498440082013, - "sentence_nr": 0 + "score": 0.3761648431086742, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7890162768101745, - "sentence_nr": 0 + "score": 0.5466736282576272, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.2910873658777246, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.40017491309543407, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.10884267615693713, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.24899186333705978, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 0 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.001410039481105471, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.33415579274035306, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.676792081658235, - "sentence_nr": 0 + "score": 0.26007925058007886, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.4446730260276365, - "sentence_nr": 0 + "score": 0.5199302229930708, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.755908468739292, - "sentence_nr": 0 + "score": 0.6017481019884499, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.43478132178539325, - "sentence_nr": 0 + "score": 0.2523019529343173, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7018196083546635, - "sentence_nr": 0 + "score": 0.4406369072888057, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.3877278798081724, - "sentence_nr": 0 + "score": 0.4195959935514934, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6833268596614586, - "sentence_nr": 0 + "score": 0.5643628666004862, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.37529811264444257, - "sentence_nr": 0 + "score": 0.35974578964005544, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6378039240510625, - "sentence_nr": 0 + "score": 0.5294218015563622, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.2934521273973611, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4075394810720701, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.400011517795393, - "sentence_nr": 0 + "score": 0.2616747576701774, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6786441872043172, - "sentence_nr": 0 + "score": 0.39134557450212215, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.4494597917400064, - "sentence_nr": 0 + "score": 0.31314224813827346, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7073438158390085, - "sentence_nr": 0 + "score": 0.3932583887521134, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.35036401492739216, - "sentence_nr": 0 + "score": 0.2921936290725188, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6426566938482169, - "sentence_nr": 0 + "score": 0.43767787354013643, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.511936592363045, - "sentence_nr": 0 + "score": 0.2921936290725188, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7671481459096147, - "sentence_nr": 0 + "score": 0.43767787354013643, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.46625439194767143, - "sentence_nr": 0 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7058896647604742, - "sentence_nr": 0 + "score": 0.4838257582776513, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.05246151962748318, - "sentence_nr": 0 + "score": 0.4501609222100726, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3468871001472823, - "sentence_nr": 0 + "score": 0.5565610322131652, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4315505266952602, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 + "score": 0.4766479849266233, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.17314327152587822, - "sentence_nr": 0 + "score": 0.4671778989333441, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5307830562814513, - "sentence_nr": 0 + "score": 0.583541922992295, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.42907605083910527, - "sentence_nr": 0 + "score": 0.44411712310948115, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6942504376084577, - "sentence_nr": 0 + "score": 0.5742558726077623, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.28038937103419465, - "sentence_nr": 0 + "score": 0.4671778989333441, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5038494750471553, - "sentence_nr": 0 + "score": 0.5801543735794272, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.25718495991757767, - "sentence_nr": 0 + "score": 0.4501609222100726, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4369258127692308, - "sentence_nr": 0 + "score": 0.5779114321769039, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.3035527425754751, - "sentence_nr": 0 + "score": 0.4501609222100726, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.47611727660210634, - "sentence_nr": 0 + "score": 0.5779114321769039, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 + "score": 0.2633684431598725, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.003762227238525207, - "sentence_nr": 0 + "score": 0.4212549338281654, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.30019255581073173, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.49869889490439867, - "sentence_nr": 0 + "score": 0.06170484898727104, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.3268845394941929, - "sentence_nr": 0 + "score": 0.27199778234956107, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5177343510524726, - "sentence_nr": 0 + "score": 0.4129085501138227, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3293368889413212, - "sentence_nr": 0 + "score": 0.19230188007838597, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.4868090313482157, - "sentence_nr": 0 + "score": 0.3407021378942239, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4592260218476326, - "sentence_nr": 0 + "score": 0.41072675483179805, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6081527520886763, - "sentence_nr": 0 + "score": 0.5635589150380774, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3597572114060291, - "sentence_nr": 0 + "score": 0.4122974402951816, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5404525494108054, - "sentence_nr": 0 + "score": 0.49812945858788304, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.06607086144978286, - "sentence_nr": 0 + "score": 0.09812163258584553, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.2742600239887939, - "sentence_nr": 0 + "score": 0.2826014149329834, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3276986983391062, - "sentence_nr": 0 + "score": 0.29486296661855094, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.47017897902052863, - "sentence_nr": 0 + "score": 0.37572427578597467, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.24830299714202062, - "sentence_nr": 0 + "score": 0.2637873055942232, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.39688820338442954, - "sentence_nr": 0 + "score": 0.30768668074852223, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.31950891970955725, - "sentence_nr": 0 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.48966432562692086, - "sentence_nr": 0 + "score": 0.31221693968406194, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3880515884750121, - "sentence_nr": 1 + "score": 0.2042128370387497, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6587916715823183, - "sentence_nr": 1 + "score": 0.35890293750629537, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.5142726846179982, - "sentence_nr": 1 + "score": 0.2042128370387497, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7344716263345912, - "sentence_nr": 1 + "score": 0.35890293750629537, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.6066498620510337, - "sentence_nr": 1 + "score": 0.16954555291875742, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7812137754227463, - "sentence_nr": 1 + "score": 0.31800856700824265, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4342750764549485, - "sentence_nr": 1 + "score": 0.21004850229269242, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7115011221714777, - "sentence_nr": 1 + "score": 0.2695278236063314, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.305730257543728, - "sentence_nr": 1 + "score": 0.24328221242512443, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6305034408922697, - "sentence_nr": 1 + "score": 0.3587841404695592, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.28822910320599077, - "sentence_nr": 1 + "score": 0.6052987576779449, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6087031937056202, - "sentence_nr": 1 + "score": 0.6657086815551626, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.5011893046413795, - "sentence_nr": 1 + "score": 0.6064630666233242, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7089203664957927, - "sentence_nr": 1 + "score": 0.6752055521830945, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.48186321118136805, - "sentence_nr": 1 + "score": 0.41489895705463226, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7050536782800418, - "sentence_nr": 1 + "score": 0.5351666707169298, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3406014428030703, - "sentence_nr": 1 + "score": 0.2505523539251516, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6514548680180557, - "sentence_nr": 1 + "score": 0.3831975160557709, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4738611152748619, - "sentence_nr": 1 + "score": 0.23244223633311675, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7293997939434749, - "sentence_nr": 1 + "score": 0.35864519940131245, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4492327786840591, - "sentence_nr": 1 + "score": 0.09694361543655163, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6917786880624969, - "sentence_nr": 1 + "score": 0.2353989148941136, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5105553787243322, - "sentence_nr": 1 + "score": 0.03616809285846403, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.44571331402556874, - "sentence_nr": 1 + "score": 0.19004145843928574, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.67235059873138, - "sentence_nr": 1 + "score": 0.3699750032227405, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.4487746167679644, - "sentence_nr": 1 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4476730201191672, - "sentence_nr": 1 + "score": 0.6612342258381259, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2836623400057614, - "sentence_nr": 1 + "score": 0.3883375900135818, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.29147337237183046, - "sentence_nr": 1 + "score": 0.4643731845106876, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2775905064108025, - "sentence_nr": 1 + "score": 0.11556647985416685, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3165767280260291, - "sentence_nr": 1 + "score": 0.23748323325414847, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.28912432952036243, - "sentence_nr": 1 + "score": 0.2589451141492935, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.31119603942667584, - "sentence_nr": 1 + "score": 0.36265302427150575, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.1157064510816097, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.24091233899862727, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3652139786200916, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3824205406345779, - "sentence_nr": 1 + "score": 0.18432769897186205, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.4094748015187699, - "sentence_nr": 1 + "score": 0.12045422179467957, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4288513205758089, - "sentence_nr": 1 + "score": 0.22184013352319704, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", - "metric": "bleu", - "score": 0.35430370029300495, - "sentence_nr": 1 + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.1143433820088083, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3864890531682498, - "sentence_nr": 1 + "score": 0.20225927443573538, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.5702271610495845, - "sentence_nr": 1 + "score": 0.1494197912720898, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5772088119985683, - "sentence_nr": 1 + "score": 0.2347938067180855, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.37405604379521823, - "sentence_nr": 1 + "score": 0.21223633441554032, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.409758558051675, - "sentence_nr": 1 + "score": 0.31520576641768766, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.4082186610925126, - "sentence_nr": 1 + "score": 0.385934808400546, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4042514356445265, - "sentence_nr": 1 + "score": 0.38257849677977857, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3835611536417376, - "sentence_nr": 1 + "score": 0.11563084454999199, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.41360439536029553, - "sentence_nr": 1 + "score": 0.24178063211164538, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.19333361726926898, - "sentence_nr": 1 + "score": 0.3925121365052661, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.28056620588920506, - "sentence_nr": 1 + "score": 0.49647175863031867, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3541652369790141, - "sentence_nr": 1 + "score": 0.17846877279661646, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.38739546241623046, - "sentence_nr": 1 + "score": 0.28232267884836304, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2113054108348111, - "sentence_nr": 1 + "score": 0.1532685994792829, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.44238229987470284, - "sentence_nr": 1 + "score": 0.2563896884841204, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.26207903587847736, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.50073123223194, - "sentence_nr": 1 + "score": 0.25957530464210776, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.16098073041469485, - "sentence_nr": 1 + "score": 0.1691896235111172, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.39710375075643284, - "sentence_nr": 1 + "score": 0.30689082834840276, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.11465623153412556, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.4497512968651573, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.1866741141650009, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.46590330015791137, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.16950698451288215, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.48668984177868246, - "sentence_nr": 1 + "score": 0.18223058800557917, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.23516650478671175, - "sentence_nr": 1 + "score": 0.6064630666233242, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4885052730214997, - "sentence_nr": 1 + "score": 0.6712747226800536, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.30632520148437686, - "sentence_nr": 1 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5904333377596244, - "sentence_nr": 1 + "score": 0.7123666275414222, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.23477037244978113, - "sentence_nr": 1 + "score": 0.580451128369423, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5165217514090542, - "sentence_nr": 1 + "score": 0.6560788161152474, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.21585895003952446, - "sentence_nr": 1 + "score": 0.25841450487427714, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.48140875917864023, - "sentence_nr": 1 + "score": 0.44494784218322847, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.2711981710401392, - "sentence_nr": 1 + "score": 0.22743363869750483, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5555651822168547, - "sentence_nr": 1 + "score": 0.41347079879706106, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.168777027092081, - "sentence_nr": 1 + "score": 0.3360020594873999, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4368481165562445, - "sentence_nr": 1 + "score": 0.4979723869498355, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.22415590998535484, - "sentence_nr": 1 + "score": 0.33062429129755794, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.49981791926190994, - "sentence_nr": 1 + "score": 0.4887128900317842, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.4556160153884204, - "sentence_nr": 1 + "score": 0.2951516238086372, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6661994452325181, - "sentence_nr": 1 + "score": 0.40065144019204096, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3216756020053242, - "sentence_nr": 1 + "score": 0.2830789070123405, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6141241026166391, - "sentence_nr": 1 + "score": 0.4006557494747156, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.43369048469848437, - "sentence_nr": 1 + "score": 0.18279744869146425, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6586872889176818, - "sentence_nr": 1 + "score": 0.4024804882630303, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.4098419224543478, - "sentence_nr": 1 + "score": 0.4461240556373289, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6358736384460296, - "sentence_nr": 1 + "score": 0.6245673099524237, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.4310064928034671, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6681893438144855, - "sentence_nr": 1 + "score": 0.10050942388421231, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.4527112325797497, - "sentence_nr": 1 + "score": 0.42988105429544615, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6708989870027865, - "sentence_nr": 1 + "score": 0.5765642529796587, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.403282335120862, - "sentence_nr": 1 + "score": 0.49764480798089855, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6319223068216205, - "sentence_nr": 1 + "score": 0.6328520698047967, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.47486558569605275, - "sentence_nr": 1 + "score": 0.3360020594873999, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6916379662719394, - "sentence_nr": 1 + "score": 0.4979723869498355, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.44234482870142466, - "sentence_nr": 1 + "score": 0.2676032275663791, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6617260327319175, - "sentence_nr": 1 + "score": 0.40689458580141896, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3870043562676652, - "sentence_nr": 1 + "score": 0.3839162951943952, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.631536050216449, - "sentence_nr": 1 + "score": 0.5154389469360604, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.465541200947692, - "sentence_nr": 1 + "score": 0.3307203628924353, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6735988737803571, - "sentence_nr": 1 + "score": 0.49086425308764237, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.37462930793644134, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6297969107438809, - "sentence_nr": 1 + "score": 0.22319449652482443, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.45236333724230443, - "sentence_nr": 1 + "score": 0.233078149078302, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.6557435747309683, - "sentence_nr": 1 + "score": 0.41167690646865734, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.37570809340937233, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6339141734561076, - "sentence_nr": 1 + "score": 0.14728954312449322, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.559332422592187, - "sentence_nr": 1 + "score": 0.2256490809237466, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.733291190094771, - "sentence_nr": 1 + "score": 0.37300331821940047, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.48457382450313924, - "sentence_nr": 1 + "score": 0.24883108274644028, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.7144409873446065, - "sentence_nr": 1 + "score": 0.3329995141432608, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.4881942815467274, - "sentence_nr": 1 + "score": 0.13827175716697776, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6662053431593723, - "sentence_nr": 1 + "score": 0.25880112791050663, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.12219667481477892, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.2405164296456886, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.5216581079910853, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.71892039370478, - "sentence_nr": 1 + "score": 0.1316478146760647, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.3977038258772401, - "sentence_nr": 1 + "score": 0.13536681105774234, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6202897864314184, - "sentence_nr": 1 + "score": 0.23595972523293418, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.07939087147543496, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.11006637454907114, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.07939087147543496, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.11006637454907114, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.5024073848733999, - "sentence_nr": 1 + "score": 0.11856660123276004, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6805608953669952, - "sentence_nr": 1 + "score": 0.21303170584506914, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.1649415532676442, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.09865498818536263, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.5531306492249056, - "sentence_nr": 1 + "score": 0.4027788021844848, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.7508908077947477, - "sentence_nr": 1 + "score": 0.5140425250930961, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.4198435178617755, - "sentence_nr": 1 + "score": 0.2536955091952947, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6615330486958915, - "sentence_nr": 1 + "score": 0.44037464621764255, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.26505211456170086, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3924854246504529, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.4423392581565186, - "sentence_nr": 1 + "score": 0.1601125708485386, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.663370348519268, - "sentence_nr": 1 + "score": 0.23752778281494039, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.1437791910008819, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.2548981391955141, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.1200550027704878, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.42298863290550076, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6260112466527037, - "sentence_nr": 1 + "score": 0.033908919446183204, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.422714343026006, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6266965858252854, - "sentence_nr": 1 + "score": 0.15843589640881697, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3446592076818278, - "sentence_nr": 1 + "score": 0.7795149903947967, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5819912583909785, - "sentence_nr": 1 + "score": 0.8912732146280626, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.23270938096152352, - "sentence_nr": 1 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4490269267329941, - "sentence_nr": 1 + "score": 0.6994652193905146, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.36763082847636347, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.11634468327243708, - "sentence_nr": 1 + "score": 0.4293667924436175, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.1766119944524977, - "sentence_nr": 1 + "score": 0.43728553857900826, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3986479587107995, - "sentence_nr": 1 + "score": 0.43806460289276367, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.2729012183957552, - "sentence_nr": 1 + "score": 0.3535749730730369, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4275664216118961, - "sentence_nr": 1 + "score": 0.40456707763346417, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3167585643537871, - "sentence_nr": 1 + "score": 0.3615855225145535, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5076869840147092, - "sentence_nr": 1 + "score": 0.40635491179072764, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.34791594751284466, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3292454551002283, - "sentence_nr": 1 + "score": 0.4062384532979022, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.39969214713924245, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.09073980329024364, - "sentence_nr": 1 + "score": 0.43942997265104256, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.25751023494151143, - "sentence_nr": 1 + "score": 0.39969214713924245, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4697665795408892, - "sentence_nr": 1 + "score": 0.43942997265104256, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.41546060026113085, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4663335536810786, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3212983212315964, - "sentence_nr": 1 + "score": 0.4390960897971484, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5599573621112933, - "sentence_nr": 1 + "score": 0.5164819146651056, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.2560040742784669, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.09236883467211593, - "sentence_nr": 1 + "score": 0.4525447568689241, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3472719365557752, - "sentence_nr": 1 + "score": 0.6016367461945803, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.529527758323629, - "sentence_nr": 1 + "score": 0.4246183605185108, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6540432510655854, - "sentence_nr": 1 + "score": 0.5045638981564646, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.49704232910799745, - "sentence_nr": 1 + "score": 0.4578226095312774, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6453248294274054, - "sentence_nr": 1 + "score": 0.5429809487027987, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.49704232910799745, - "sentence_nr": 1 + "score": 0.4390960897971484, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6453248294274054, - "sentence_nr": 1 + "score": 0.5164819146651056, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.5494410974163585, - "sentence_nr": 1 + "score": 0.409211292187266, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6853937472090788, - "sentence_nr": 1 + "score": 0.39189287710585047, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.5201554059039623, - "sentence_nr": 1 + "score": 0.27748702735605824, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6912283534522488, - "sentence_nr": 1 + "score": 0.371892873556356, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.44114781827798216, - "sentence_nr": 1 + "score": 0.1600733593956426, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6241365710582877, - "sentence_nr": 1 + "score": 0.2350173429055301, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4286794450695727, - "sentence_nr": 1 + "score": 0.27182269429130823, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6365023289177463, - "sentence_nr": 1 + "score": 0.3135936384682831, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.48149733895714797, - "sentence_nr": 1 + "score": 0.30389058699653954, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6355577992154319, - "sentence_nr": 1 + "score": 0.4310896909809194, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.4744991305294048, - "sentence_nr": 1 + "score": 0.3763278728427448, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6720481841701565, - "sentence_nr": 1 + "score": 0.4261526683335186, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.36210097004176117, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4423339372603474, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.4946489712934811, - "sentence_nr": 1 + "score": 0.3763278728427448, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6635756951391838, - "sentence_nr": 1 + "score": 0.4047854120649662, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.32345422777393923, - "sentence_nr": 1 + "score": 0.38656953855321047, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5652905380017423, - "sentence_nr": 1 + "score": 0.3844238861690475, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.473424955479643, - "sentence_nr": 1 + "score": 0.3917196589390866, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6791725069180572, - "sentence_nr": 1 + "score": 0.3964644743683115, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3479698393875884, - "sentence_nr": 1 + "score": 0.33737554588923646, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5760833125751785, - "sentence_nr": 1 + "score": 0.369875665962309, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.24373253714463095, - "sentence_nr": 1 + "score": 0.1423412184218882, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.49482039214573803, - "sentence_nr": 1 + "score": 0.31142792394410035, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.312050635062637, - "sentence_nr": 1 + "score": 0.1863161938392376, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5390444512132623, - "sentence_nr": 1 + "score": 0.36138028769379077, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.24229889794871173, - "sentence_nr": 1 + "score": 0.3780488661667278, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4853505495636382, - "sentence_nr": 1 + "score": 0.38749538363085073, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.25483341226864464, - "sentence_nr": 1 + "score": 0.3780488661667278, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4565768361133673, - "sentence_nr": 1 + "score": 0.38749538363085073, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3542266508664836, - "sentence_nr": 1 + "score": 0.3040122368256446, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5643413028542406, - "sentence_nr": 1 + "score": 0.3640142383589547, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.18282456123768265, - "sentence_nr": 1 + "score": 0.443579034212419, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.47540661243586124, - "sentence_nr": 1 + "score": 0.48854522816542867, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3721154325198309, - "sentence_nr": 1 + "score": 0.443579034212419, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6214976185877734, - "sentence_nr": 1 + "score": 0.48854522816542867, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2921982022041547, - "sentence_nr": 1 + "score": 0.4246183605185108, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5264166199754001, - "sentence_nr": 1 + "score": 0.501948957312799, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.32001589569502475, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.41050505670086324, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3142825719425009, - "sentence_nr": 1 + "score": 0.32522111306534696, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.584353897647861, - "sentence_nr": 1 + "score": 0.3743770394908015, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.1767445234920605, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.30304368225621264, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.21163700429456012, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2754475815887392, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.17473028966988555, - "sentence_nr": 1 + "score": 0.25437706194445847, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.400425072418037, - "sentence_nr": 1 + "score": 0.28858721839661267, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2988697040013311, - "sentence_nr": 1 + "score": 0.39022736644855677, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5442522660489195, - "sentence_nr": 1 + "score": 0.5202587069271436, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.500703635659656, - "sentence_nr": 1 + "score": 0.3672404084841361, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6501904887399698, - "sentence_nr": 1 + "score": 0.4518744271362045, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.4876463179677598, - "sentence_nr": 1 + "score": 0.34332704063408953, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6113405963585182, - "sentence_nr": 1 + "score": 0.408639131209588, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.5199813503697857, - "sentence_nr": 1 + "score": 0.3672404084841361, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6584629522606407, - "sentence_nr": 1 + "score": 0.43288682804187184, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.3924259174695316, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4246539836622663, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.4767709962267032, - "sentence_nr": 1 + "score": 0.34791594751284466, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6259826462063701, - "sentence_nr": 1 + "score": 0.39981345815021024, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.3995439803178399, - "sentence_nr": 1 + "score": 0.3924259174695316, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6021193793256325, - "sentence_nr": 1 + "score": 0.4246539836622663, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.21448629472025388, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3142841653684826, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.21448629472025388, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.31086886046532736, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.40656183899584336, - "sentence_nr": 1 + "score": 0.3120848453730729, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5890799945028116, - "sentence_nr": 1 + "score": 0.3705328519354151, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.38656953855321047, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3880470780323705, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20477156411200437, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.30216175609979257, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.49952715015218047, - "sentence_nr": 1 + "score": 0.2082633426637247, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6422528647342378, - "sentence_nr": 1 + "score": 0.43955021643262504, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.528547004876945, - "sentence_nr": 1 + "score": 0.4185938787651429, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6459593469343872, - "sentence_nr": 1 + "score": 0.49815219590152665, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.18059154473936082, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3333808784117231, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.45002572171222577, - "sentence_nr": 1 + "score": 0.27334594211112967, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.620458099259989, - "sentence_nr": 1 + "score": 0.35690726265980793, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.41546060026113085, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4397124902714759, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.27326653732219497, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3528072458588813, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.27875207406965286, - "sentence_nr": 1 + "score": 0.31277600813200596, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5095968928696253, - "sentence_nr": 1 + "score": 0.3423771857129358, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.5576102993622991, - "sentence_nr": 1 + "score": 0.25944320225692963, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6640761861237344, - "sentence_nr": 1 + "score": 0.3049825437064534, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.43330223254789785, - "sentence_nr": 1 + "score": 0.25383339228798274, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5564499529933307, - "sentence_nr": 1 + "score": 0.46123461430035645, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.1466607445607986, - "sentence_nr": 1 + "score": 0.16401798649868696, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.36552963821230766, - "sentence_nr": 1 + "score": 0.3760928911869727, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.20527494029659898, - "sentence_nr": 1 + "score": 0.31756348590173983, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.43586475049009993, - "sentence_nr": 1 + "score": 0.612638372401986, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3047577636054668, - "sentence_nr": 1 + "score": 0.1535259783865636, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.48318512703629857, - "sentence_nr": 1 + "score": 0.35449697447586703, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.16510240061590087, - "sentence_nr": 1 + "score": 0.23966219681055514, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.37927849234648453, - "sentence_nr": 1 + "score": 0.5164809275250435, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2993081268625724, - "sentence_nr": 1 + "score": 0.13750902305262408, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.47777429598730525, - "sentence_nr": 1 + "score": 0.3476437557836216, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.12340057804403023, - "sentence_nr": 1 + "score": 0.21027545940631823, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3331532512757645, - "sentence_nr": 1 + "score": 0.4572185175571455, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.469516870711376, - "sentence_nr": 1 + "score": 0.09812163258584553, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6115882471919187, - "sentence_nr": 1 + "score": 0.2684447524373158, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3196191720459511, - "sentence_nr": 1 + "score": 0.09500266321476487, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.4758634857690128, - "sentence_nr": 1 + "score": 0.2717648186978408, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.06692436199443168, - "sentence_nr": 1 + "score": 0.14113991930789777, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.23947877713211682, - "sentence_nr": 1 + "score": 0.33851813032056655, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3095674062940522, - "sentence_nr": 1 + "score": 0.1462806365365753, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.49847201920427264, - "sentence_nr": 1 + "score": 0.3207408262155079, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.08748671768279999, - "sentence_nr": 1 + "score": 0.2560038657424253, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.01250047619586174, - "sentence_nr": 1 + "score": 0.5204944454433559, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.12383271014582256, - "sentence_nr": 1 + "score": 0.6659376403204792, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.41807822202441103, - "sentence_nr": 1 + "score": 0.5040802179792181, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.577545891208518, - "sentence_nr": 1 + "score": 0.6292677087296895, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3475258894340562, - "sentence_nr": 1 + "score": 0.25383339228798274, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5729813197277963, - "sentence_nr": 1 + "score": 0.46285845798510755, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.4610791064938662, - "sentence_nr": 1 + "score": 0.307137308263447, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5982792041883009, - "sentence_nr": 1 + "score": 0.6108807661013372, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.4642320266834861, - "sentence_nr": 1 + "score": 0.31756348590173983, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6555816107178817, - "sentence_nr": 1 + "score": 0.612638372401986, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.38823222788076894, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5684042820738361, - "sentence_nr": 1 + "score": 0.22797131720811048, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.35312894221988256, - "sentence_nr": 1 + "score": 0.104552581027927, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5483853808672988, - "sentence_nr": 1 + "score": 0.22766480821275292, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5148124488217735, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6443063241500353, - "sentence_nr": 1 + "score": 0.2140405179077785, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.6009332869110189, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7201156007131091, - "sentence_nr": 1 + "score": 0.17807129401511626, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.6427952895393818, - "sentence_nr": 1 + "score": 0.07843772989359644, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7611062226622591, - "sentence_nr": 1 + "score": 0.2669076052967215, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5772497332582994, - "sentence_nr": 1 + "score": 0.12913533075470382, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6924288924783911, - "sentence_nr": 1 + "score": 0.25219595014343127, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5908781325191875, - "sentence_nr": 1 + "score": 0.108043996762779, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7135251491959551, - "sentence_nr": 1 + "score": 0.24726477214045167, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3202249300680136, - "sentence_nr": 1 + "score": 0.22018448209829633, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5240877863757325, - "sentence_nr": 1 + "score": 0.2705349647587311, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3021474642420404, - "sentence_nr": 1 + "score": 0.08635800047213174, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.49488718577709084, - "sentence_nr": 1 + "score": 0.23875573724774168, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.20238796310390209, - "sentence_nr": 1 + "score": 0.11414633188690328, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5665467522687606, - "sentence_nr": 1 + "score": 0.2503197875391322, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.4612469192468151, - "sentence_nr": 1 + "score": 0.16156345887749107, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.689540484203802, - "sentence_nr": 1 + "score": 0.3073644007222291, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.4688149931788708, - "sentence_nr": 1 + "score": 0.16156345887749107, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6935921004770637, - "sentence_nr": 1 + "score": 0.3073644007222291, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.31907001507985117, - "sentence_nr": 1 + "score": 0.07418182487612639, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6396787125445289, - "sentence_nr": 1 + "score": 0.26363958328353637, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2589941364530944, - "sentence_nr": 1 + "score": 0.11450137919698138, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5932208556546101, - "sentence_nr": 1 + "score": 0.23075195332816217, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3473313422920779, - "sentence_nr": 1 + "score": 0.24822495192832897, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6495220842154038, - "sentence_nr": 1 + "score": 0.31986636367127785, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.15605718228191343, - "sentence_nr": 1 + "score": 0.15082713742973322, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.48559902973042135, - "sentence_nr": 1 + "score": 0.2362536212361225, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.4537382610431785, - "sentence_nr": 1 + "score": 0.24671763489589052, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6984640606230582, - "sentence_nr": 1 + "score": 0.31806081798541963, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.48361343491637904, - "sentence_nr": 1 + "score": 0.1022763758993479, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7036055457806847, - "sentence_nr": 1 + "score": 0.2461976716440084, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 0.11092770141728163, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 0.24752626772001793, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.38248883198762607, - "sentence_nr": 1 + "score": 0.10788569011528462, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6879676788132258, - "sentence_nr": 1 + "score": 0.24852458431667404, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1770481008376135, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.13828520206051664, - "sentence_nr": 1 + "score": 0.1179949261549654, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.1943642066325126, - "sentence_nr": 1 + "score": 0.108043996762779, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5437845506934148, - "sentence_nr": 1 + "score": 0.2560670416470495, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.45718638941364104, - "sentence_nr": 1 + "score": 0.3029928206533524, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6765316874457515, - "sentence_nr": 1 + "score": 0.45262153733641225, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2842437601270078, - "sentence_nr": 1 + "score": 0.43000007605628365, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.616554183160495, - "sentence_nr": 1 + "score": 0.5073076708050636, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2790392444035496, - "sentence_nr": 1 + "score": 0.4125656013737127, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.61628786229813, - "sentence_nr": 1 + "score": 0.4621663225660634, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.1969296745448077, - "sentence_nr": 1 + "score": 0.3512502252598613, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5972281991369082, - "sentence_nr": 1 + "score": 0.43869157736529, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.33159620794115396, - "sentence_nr": 1 + "score": 0.17414675099971177, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6420436747364323, - "sentence_nr": 1 + "score": 0.28584149811217957, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3319181496626261, - "sentence_nr": 1 + "score": 0.17268932789342512, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6296213700542458, - "sentence_nr": 1 + "score": 0.2845292485005295, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3239007562192641, - "sentence_nr": 1 + "score": 0.18105048502088059, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.632048088218684, - "sentence_nr": 1 + "score": 0.271054087912132, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3728937115586142, - "sentence_nr": 1 + "score": 0.14908960803395838, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6577808113928253, - "sentence_nr": 1 + "score": 0.1969983001676391, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3668099116926436, - "sentence_nr": 1 + "score": 0.14908960803395838, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6472192230833502, - "sentence_nr": 1 + "score": 0.1969983001676391, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3437020087720264, - "sentence_nr": 1 + "score": 0.10991514729498916, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6253594148257299, - "sentence_nr": 1 + "score": 0.2549321593272589, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2912353795089198, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6107975281071784, - "sentence_nr": 1 + "score": 0.18130004952029985, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2493518765085485, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5378176060849199, - "sentence_nr": 1 + "score": 0.20158624707371023, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3199241796546606, - "sentence_nr": 1 + "score": 0.44776047557667586, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6151079711025308, - "sentence_nr": 1 + "score": 0.557570822898273, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.6260866791475674, - "sentence_nr": 1 + "score": 0.4125656013737127, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.7122695616091047, - "sentence_nr": 1 + "score": 0.4889462808086373, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3856140451435003, - "sentence_nr": 1 + "score": 0.3471636178393148, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5942230347389459, - "sentence_nr": 1 + "score": 0.4191180487309204, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.47809357926196877, - "sentence_nr": 1 + "score": 0.17493830569974406, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6515239202890919, - "sentence_nr": 1 + "score": 0.26690261630673184, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.45010740912530395, - "sentence_nr": 1 + "score": 0.1998573974138024, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6258219368613708, - "sentence_nr": 1 + "score": 0.2618744307213444, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.4876819889238188, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6502930111654278, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.5145500336945869, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6710712793486331, - "sentence_nr": 1 + "score": 0.13110052718009899, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.6152720493266995, - "sentence_nr": 1 + "score": 0.25678404806291744, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.7135410085152943, - "sentence_nr": 1 + "score": 0.35179752925748303, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.60059205595428, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7065044125404809, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.6505785903453039, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7498687328904513, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.32481728488530576, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5086455217662933, - "sentence_nr": 1 + "score": 0.2246029757863831, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5609895346624986, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.681370247009325, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.025127088788317715, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.21031980892802613, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.14721260533033206, - "sentence_nr": 1 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.2721389124032325, - "sentence_nr": 1 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.31315822356102974, - "sentence_nr": 1 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.37596627611906025, - "sentence_nr": 1 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.16777402882140335, - "sentence_nr": 1 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.2760884586584135, - "sentence_nr": 1 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2500271340971054, - "sentence_nr": 1 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3785374220544885, - "sentence_nr": 1 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.29427156769985635, - "sentence_nr": 1 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.38674923884011136, - "sentence_nr": 1 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2923799210557074, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.37275082068184157, - "sentence_nr": 1 + "score": 0.6417603075499863, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.3995850754493543, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5087563569873156, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.28256258503905557, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4403352184603733, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.26468987958610807, - "sentence_nr": 1 + "score": 0.37709297891717664, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3655402609394103, - "sentence_nr": 1 + "score": 0.6881502501430368, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.08149835562288019, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.22869808598813696, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.12394169238067233, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.19833625372206998, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.17671397515361123, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.2956531697676844, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.7017829861193574, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.7743327021667388, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.6961795371760597, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7859480663394858, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "microsoft/phi-4", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5643442092080923, - "sentence_nr": 2 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "microsoft/phi-4", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7005543453411931, - "sentence_nr": 2 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5432312750246535, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6677259864784132, - "sentence_nr": 2 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6021903435236307, - "sentence_nr": 2 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7189230568243182, - "sentence_nr": 2 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6837528314895732, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7968789890147058, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4391684160269219, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6371098202414471, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.7191192065120268, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7900451518124424, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.7555875294328935, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8049022687045564, - "sentence_nr": 2 + "score": 0.5581982021478125, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", "score": 0.0, - "sentence_nr": 2 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 0.6443411340522405, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7335999563315522, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2285369650225378, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4750387664265888, - "sentence_nr": 2 + "score": 0.6364887816884232, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.5975003598259766, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7683913390959731, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14118350058219528, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20431837779877604, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1811004938014804, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2649993136544717, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14089011087858522, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.21944603811527294, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.12501819027374758, - "sentence_nr": 2 + "score": 0.5285087045343208, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1887796230243076, - "sentence_nr": 2 + "score": 0.652013511062815, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2360941227140328, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35939098278145853, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1273192735797341, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.22231961416584312, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3487145358887869, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.48564425156445185, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3360376952328008, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35297640449956286, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13680836462007476, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.24537888283181183, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21687218788036394, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35659125027777805, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1017839169529136, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14549060082020032, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.22054620758680943, - "sentence_nr": 2 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20721924345714232, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36475932190367044, - "sentence_nr": 2 + "score": 0.43795381992037963, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11386607947762988, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.33564583347921473, - "sentence_nr": 2 + "score": 0.5881561248602009, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.16862356321891248, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3637462812267946, - "sentence_nr": 2 + "score": 0.46670957224939175, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1485432117087218, - "sentence_nr": 2 + "score": 0.37544324742239676, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5293274846116299, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.12106878595868109, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3192454012719998, - "sentence_nr": 2 + "score": 0.5344225462130586, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23649053182388327, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4127382174759535, - "sentence_nr": 2 + "score": 0.49546288984677567, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.40577230815100285, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4519601247384226, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2392792151449317, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.388678103641788, - "sentence_nr": 2 + "score": 0.41602211217571683, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5622542444405211, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.42966520599840885, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3259738911297118, - "sentence_nr": 2 + "score": 0.18842393723950338, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4954841706551886, - "sentence_nr": 2 + "score": 0.5646801080937621, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.41345800279862177, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34101364633474157, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5758572581135913, - "sentence_nr": 2 + "score": 0.49713060327965375, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.16195570128532405, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", + "score": 0.581645267684411, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.4638613135486478, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.2026639468552004, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4471011187469559, - "sentence_nr": 2 + "score": 0.41734150775835166, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11622323415479685, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.10826694406224016, - "sentence_nr": 2 + "score": 0.429292711066547, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.180048782148418, - "sentence_nr": 2 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3772586334343914, - "sentence_nr": 2 + "score": 0.5848202846227532, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46442643702863534, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5519480629125156, - "sentence_nr": 2 + "score": 0.41213231348812146, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6268941789647348, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6958291103494518, - "sentence_nr": 2 + "score": 0.40435987083533204, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4554740717077828, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5498766350188072, - "sentence_nr": 2 + "score": 0.34256683873776383, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35817810808590844, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5012707040525209, - "sentence_nr": 2 + "score": 0.41477028165511615, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6297960258710876, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7117676662366008, - "sentence_nr": 2 + "score": 0.33319754264314433, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.570135897056151, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6801332690579707, - "sentence_nr": 2 + "score": 0.40854152133685306, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.349335635815966, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4827709277987172, - "sentence_nr": 2 + "score": 0.4164061298971701, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.34945014016328746, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.37581389131485465, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3947812939950854, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5373384617862703, - "sentence_nr": 2 + "score": 0.40562290854898025, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4692880637764782, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5773610754678101, - "sentence_nr": 2 + "score": 0.3327212134448652, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34182319563232233, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5052410644804232, - "sentence_nr": 2 + "score": 0.5709936728721758, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5796814083647206, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6364369549208913, - "sentence_nr": 2 + "score": 0.40562290854898025, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.40562290854898025, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.46696436096182586, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2985280444159845, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5072627289039213, - "sentence_nr": 2 + "score": 0.33546955366063214, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49402195020645817, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.583821485566765, - "sentence_nr": 2 + "score": 0.40319099863003527, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22837680015088951, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44164180234500505, - "sentence_nr": 2 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4151474543103342, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.638952468710771, - "sentence_nr": 2 + "score": 0.39858613265631837, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4355097603079957, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6511365998081735, - "sentence_nr": 2 + "score": 0.3818534926571001, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2468185992183292, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46792167630295967, - "sentence_nr": 2 + "score": 0.3644112480028862, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37475590201160436, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.549178868228435, - "sentence_nr": 2 + "score": 0.3898236605340541, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21305368975019265, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4371748197696026, - "sentence_nr": 2 + "score": 0.3128264071150837, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.08919951949408464, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2986174009048306, - "sentence_nr": 2 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4176763688729275, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6124700716856484, - "sentence_nr": 2 + "score": 0.3099872951741207, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.38791552573256816, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5723637874192081, - "sentence_nr": 2 + "score": 0.3099872951741207, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.393379300802006, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", + "score": 0.3955832343987641, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.389666294007518, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.30325797018747325, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3066337968338866, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3892064098781075, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4305675865000082, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6312508299648723, - "sentence_nr": 2 + "score": 0.3010381621698183, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3118742681270173, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.393379300802006, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.011973456545827533, - "sentence_nr": 2 + "score": 0.28783297914763095, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3002149853465536, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5378189160780977, - "sentence_nr": 2 + "score": 0.3958941272081701, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11634129390828839, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.31530902302000635, - "sentence_nr": 2 + "score": 0.5042211795038526, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19544795798162903, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3835451743665027, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19889333501994313, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3885583772632557, - "sentence_nr": 2 + "score": 0.511876122662448, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.24480102898506534, - "sentence_nr": 2 + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5582360999449585, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2623806581920467, - "sentence_nr": 2 + "score": 0.4418365362317144, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20876900081884944, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3981381071356935, - "sentence_nr": 2 + "score": 0.5948724602646328, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5644198744535891, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4566163202450332, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.1582263258709324, - "sentence_nr": 2 + "score": 0.5049375875723539, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.608796342723599, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3184011333042053, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.475779589579386, - "sentence_nr": 2 + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5565173534238104, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3530704022752377, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.521530381948501, - "sentence_nr": 2 + "score": 0.5582360999449585, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", + "score": 0.4915933923809756, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.4429196299668147, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.21574854574751035, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41940323708656974, - "sentence_nr": 2 + "score": 0.4854320703575965, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.058854097785805734, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.17240019222052141, - "sentence_nr": 2 + "score": 0.41469341972645324, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.23425891587078498, - "sentence_nr": 2 + "score": 0.39451521279220947, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.359355103997122, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5589602235417395, - "sentence_nr": 2 + "score": 0.5516607622642397, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4267520229161, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5518115366540288, - "sentence_nr": 2 + "score": 0.47160616105623426, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4248870612387681, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5641041633033193, - "sentence_nr": 2 + "score": 0.5256353512715748, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.40429429626811253, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.575799986766, - "sentence_nr": 2 + "score": 0.3765697091436241, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2534743707366162, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5971149415305869, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3877240689639599, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5570859361697285, - "sentence_nr": 2 + "score": 0.3431841258656284, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37917766663411384, - "sentence_nr": 2 + "score": 0.27447938256311044, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5365794450039074, - "sentence_nr": 2 + "score": 0.615291848344044, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2948993986902436, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5989264158576341, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2948993986902436, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5989264158576341, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23329145933277767, - "sentence_nr": 2 + "score": 0.31573558123189943, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44291475401588093, - "sentence_nr": 2 + "score": 0.6989238098201116, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.12962472880491877, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5750206459290513, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14296145628396553, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5102054230731641, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.40977628070530747, - "sentence_nr": 2 + "score": 0.33118227522229554, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5623921018498135, - "sentence_nr": 2 + "score": 0.7010244056936935, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30626379803308257, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5077543267123376, - "sentence_nr": 2 + "score": 0.6245952145297528, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.39963516628793516, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5372822043426468, - "sentence_nr": 2 + "score": 0.6245952145297528, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.38861707449775285, - "sentence_nr": 2 + "score": 0.2680165156355779, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5432656354167995, - "sentence_nr": 2 + "score": 0.5989264158576341, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2680165156355779, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5989264158576341, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.32594818888335836, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6263180162489238, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.17706333085447226, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4047932836379997, - "sentence_nr": 2 + "score": 0.4425650919372919, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30904104300309865, - "sentence_nr": 2 + "score": 0.32594818888335836, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.48677056338263186, - "sentence_nr": 2 + "score": 0.6263180162489238, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35551034193127495, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5627284645723449, - "sentence_nr": 2 + "score": 0.4576529535952892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5039752490702457, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.613669501327356, - "sentence_nr": 2 + "score": 0.5309982646782259, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.36932295883897953, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5524455184773474, - "sentence_nr": 2 + "score": 0.4726395749383864, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.080331199191236, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.23021641289829473, - "sentence_nr": 2 + "score": 0.40052428191473877, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4214137028076755, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3297358210077752, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.506224157487821, - "sentence_nr": 2 + "score": 0.2630268050449769, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3221305290185444, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4866081657424789, - "sentence_nr": 2 + "score": 0.33762297226992255, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4279680255777309, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4179740300979212, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.10563809356628297, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2323275601638909, - "sentence_nr": 2 + "score": 0.39336600752225864, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.399524781060505, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3811737094308541, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4592557039164775, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6121399518555048, - "sentence_nr": 2 + "score": 0.5189004396088754, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.32094538365835024, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37818447598700816, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5516941276443429, - "sentence_nr": 2 + "score": 0.4101715667811344, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", + "score": 0.45834841871997833, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.4498692000973136, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.34591973979258805, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5197016245837053, - "sentence_nr": 2 + "score": 0.4568577003687423, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.10020997712284248, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.16327778043310373, - "sentence_nr": 2 + "score": 0.31754227193241025, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24470192769722524, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4583472827584427, - "sentence_nr": 2 + "score": 0.3974726419025883, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5088535943352446, - "sentence_nr": 2 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.625202596789752, - "sentence_nr": 2 + "score": 0.6374693500772332, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.562048819850726, - "sentence_nr": 2 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7192054483864224, - "sentence_nr": 2 + "score": 0.6151179643430991, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5550041554031738, - "sentence_nr": 2 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6554946147279708, - "sentence_nr": 2 + "score": 0.686947433675709, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4406896260480816, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.571328063702761, - "sentence_nr": 2 + "score": 0.4746119151171374, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6077585258730265, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7400152006566423, - "sentence_nr": 2 + "score": 0.5833743057382133, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6260375038358343, - "sentence_nr": 2 + "score": 0.195647514979229, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7803415401430737, - "sentence_nr": 2 + "score": 0.5832256253964303, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2961648173595504, - "sentence_nr": 2 + "score": 0.2658483576665877, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5022745285039809, - "sentence_nr": 2 + "score": 0.6410540990527072, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5803184114968359, - "sentence_nr": 2 + "score": 0.23578316044531808, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.759870828515734, - "sentence_nr": 2 + "score": 0.6511343054546453, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37446819995007063, - "sentence_nr": 2 + "score": 0.23578316044531808, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5965995710194948, - "sentence_nr": 2 + "score": 0.6511343054546453, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.41110950985436373, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6710923400142267, - "sentence_nr": 2 + "score": 0.5639241776831634, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.46160726531733565, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.25336549464486463, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6437859092065046, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4513165758172509, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7514771576902608, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.23578316044531808, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6511343054546453, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5281061979991509, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.47237086893932345, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6521003933528818, - "sentence_nr": 2 + "score": 0.5554602680850725, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.17181529671327242, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6053635787005981, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6520065459729374, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.31867018346252723, - "sentence_nr": 2 + "score": 0.41291750111233794, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5261433842307197, - "sentence_nr": 2 + "score": 0.17181529671327242, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.709255033821849, - "sentence_nr": 2 + "score": 0.6053635787005981, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3941975148525721, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5191046479503385, - "sentence_nr": 2 + "score": 0.46426595961938383, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.262633940062176, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41923206553744197, - "sentence_nr": 2 + "score": 0.41238100267720657, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3850172427136058, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5264633431241114, - "sentence_nr": 2 + "score": 0.41238100267720657, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.15363234192450648, - "sentence_nr": 2 + "score": 0.4806367958084579, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.43269692060361126, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.251174652769276, - "sentence_nr": 2 + "score": 0.5066311799500233, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3146726146646545, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4709531555683, - "sentence_nr": 2 + "score": 0.6562641136790542, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4494592020783298, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.45393803242035113, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.13978782442553714, - "sentence_nr": 2 + "score": 0.5013632657267051, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.44006989470956354, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.45746339332244207, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1898569805320716, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3803223837566382, - "sentence_nr": 2 + "score": 0.49118110181149666, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.49118110181149666, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3223419048219805, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5205977846006183, - "sentence_nr": 2 + "score": 0.4224991954993499, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", + "score": 0.5379068753129642, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.5379068753129642, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.1764046491640527, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3464061249457313, - "sentence_nr": 2 + "score": 0.44493884815976026, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.18856799944599728, - "sentence_nr": 2 + "score": 0.3348758882377771, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20466701735848536, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3025868321081519, - "sentence_nr": 2 + "score": 0.4601349893675622, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2618161850312308, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46946589430056646, - "sentence_nr": 2 + "score": 0.410846945789476, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4395347891601966, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6124294442602769, - "sentence_nr": 2 + "score": 0.39909989628767284, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.33711507396378565, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.568383173179082, - "sentence_nr": 2 + "score": 0.39913709020460375, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3222423455530638, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5643748237802169, - "sentence_nr": 2 + "score": 0.40443357144012176, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.39909989628767284, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.40332947519159895, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6228862504867423, - "sentence_nr": 2 + "score": 0.4354398635855642, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.44898438516407524, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6143783254714975, - "sentence_nr": 2 + "score": 0.5705717737418762, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.40443357144012176, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.40443357144012176, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3688091032179454, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5618587771651018, - "sentence_nr": 2 + "score": 0.4121946181418776, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.40443357144012176, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4219530220129113, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43155890347066467, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.619009447565164, - "sentence_nr": 2 + "score": 0.39909989628767284, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4918904748281632, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6551168488426827, - "sentence_nr": 2 + "score": 0.41663636213344807, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.44411177326152307, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6264654386006935, - "sentence_nr": 2 + "score": 0.42422145417131013, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1456085160245154, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4152567008092657, - "sentence_nr": 2 + "score": 0.5606044053771457, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4276677753816098, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.40706217940224826, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.30284835181827113, - "sentence_nr": 2 + "score": 0.40443357144012176, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3991070933698779, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5925781167136664, - "sentence_nr": 2 + "score": 0.4121946181418776, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13139413594401378, - "sentence_nr": 2 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2490406851204271, - "sentence_nr": 2 + "score": 0.6758978744760765, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3112317271723676, - "sentence_nr": 2 + "score": 0.17181529671327242, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4361597730424806, - "sentence_nr": 2 + "score": 0.5293474685884572, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.36717349445307196, - "sentence_nr": 2 + "score": 0.23578316044531808, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4938275160496472, - "sentence_nr": 2 + "score": 0.5821373704411671, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.32679491753274487, - "sentence_nr": 2 + "score": 0.14528679532351443, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5061357551531296, - "sentence_nr": 2 + "score": 0.514952316880994, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6314372536561444, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3958704329397872, - "sentence_nr": 2 + "score": 0.21972813874997157, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5196505496421775, - "sentence_nr": 2 + "score": 0.6022094443409847, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4850978822371748, - "sentence_nr": 2 + "score": 0.3113878808075066, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6352541213631081, - "sentence_nr": 2 + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.21305413619585087, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6284808905153216, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.21305413619585087, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6284808905153216, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1597896899620504, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.49086962788909555, - "sentence_nr": 2 + "score": 0.3128496839849598, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.1487964117124549, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.421318296293088, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.21899108456429012, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5117916534946495, - "sentence_nr": 2 + "score": 0.5773502691896258, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6878657354957926, - "sentence_nr": 2 + "score": 0.7999099314029202, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5323469509459265, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5420104447784688, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6785083781968722, - "sentence_nr": 2 + "score": 0.6371798394308665, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", + "score": 0.6423124418413864, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.6423124418413864, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", "metric": "bleu", - "score": 0.36033300506928556, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5012711439020835, - "sentence_nr": 2 + "score": 0.470365760449425, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1975941306622024, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.27520597189594015, - "sentence_nr": 2 + "score": 0.2970314818988727, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2983588344542972, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5041326432925124, - "sentence_nr": 2 + "score": 0.3292499962917628, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.39997687282627975, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5906362815628093, - "sentence_nr": 2 + "score": 0.4410492519530161, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5208833700498166, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6368157603637512, - "sentence_nr": 2 + "score": 0.4429196299668147, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5248317090186142, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6599904808886127, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3727105527986878, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5475770179024447, - "sentence_nr": 2 + "score": 0.47465074831919213, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5354397296450966, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6627191681525589, - "sentence_nr": 2 + "score": 0.4429196299668147, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49612267717096975, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6218353723304708, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.425433767253164, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5818873909634904, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.674363352915248, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7903753469468177, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5781534325005774, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.744306267545875, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5567668766994894, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7126872503504181, - "sentence_nr": 2 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5809669142768361, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7168714328499641, - "sentence_nr": 2 + "score": 0.44024010151528153, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21025696416672812, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4550016613145898, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4440931655950853, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.44830378475308, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.47465074831919213, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.44830378475308, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.49360149005422826, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.44667985032715174, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.39080193524562357, - "sentence_nr": 2 + "score": 0.2623399284064729, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43929751176084064, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5889147220911218, - "sentence_nr": 2 + "score": 0.4373156210032521, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1743988338080954, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4684683280769817, - "sentence_nr": 2 + "score": 0.3844263765000694, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3335763231736967, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4822714438205533, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24628203589181794, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.47490747232624714, - "sentence_nr": 2 + "score": 0.3805770883173698, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2865635502271963, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.43207538722163397, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1148153812028893, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20549791555765032, - "sentence_nr": 2 + "score": 0.40508457369784023, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3501847839621347, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5180344374850399, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21195371406845798, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4061072764676573, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.39818525322365445, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.355480478065782, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5288950976571054, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3483718729405163, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4980927055323402, - "sentence_nr": 2 + "score": 0.39818525322365445, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4522982965509296, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5291817462027207, - "sentence_nr": 2 + "score": 0.3844263765000694, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.32488958976180393, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5442618574958996, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3844263765000694, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11781301843777481, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20219186255257193, - "sentence_nr": 2 + "score": 0.3830425592586042, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35111125142401484, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.47360088422177105, - "sentence_nr": 2 + "score": 0.39818525322365445, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30485765641951534, - "sentence_nr": 2 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4770499606054267, - "sentence_nr": 2 + "score": 0.6474126202050918, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.33469420519942356, - "sentence_nr": 2 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40802446160905737, - "sentence_nr": 2 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4747315561546192, - "sentence_nr": 2 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5626366882998202, - "sentence_nr": 2 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.13084917716157846, - "sentence_nr": 2 + "score": 0.5521590062829653, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.19923405658137924, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6211036406023237, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.44404782758976735, - "sentence_nr": 2 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5596520713104719, - "sentence_nr": 2 + "score": 0.6474126202050918, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.41618377742781326, - "sentence_nr": 2 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5456804815374756, - "sentence_nr": 2 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23360210277843085, - "sentence_nr": 2 + "score": 0.18814785746917081, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3931394603219493, - "sentence_nr": 2 + "score": 0.628558096175999, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2977932227461586, - "sentence_nr": 2 + "score": 0.18814785746917081, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.45438903925756086, - "sentence_nr": 2 + "score": 0.628558096175999, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22256734278223791, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3765773373060594, - "sentence_nr": 2 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.16910165945030708, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2980085795836986, - "sentence_nr": 2 + "score": 0.5513474668136094, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4575089222077589, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5374740662953226, - "sentence_nr": 2 + "score": 0.5513474668136094, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.05780839041831641, - "sentence_nr": 2 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.09190080998528566, - "sentence_nr": 2 + "score": 0.6474126202050918, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1704823945286264, - "sentence_nr": 2 + "score": 0.23693055763743093, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.33422996692133256, - "sentence_nr": 2 + "score": 0.6474126202050918, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.7505336182671021, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.8401910628269498, - "sentence_nr": 3 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.9289416300153619, - "sentence_nr": 3 + "score": 0.6131017059052001, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.4450050658086207, - "sentence_nr": 3 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.7558874882119336, - "sentence_nr": 3 + "score": 0.5802683403568892, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.24688498672025874, - "sentence_nr": 3 + "score": 0.1423071532720465, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.6300112897041039, - "sentence_nr": 3 + "score": 0.5673078468780355, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.8107492451395732, - "sentence_nr": 3 + "score": 0.19923405658137924, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.900032747778274, - "sentence_nr": 3 + "score": 0.6211036406023237, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.3446073377034663, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.7621696379946562, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3237722713145643, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.7426638026175545, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, - "sentence_nr": 3 + "score": 0.37030468338190614, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, - "sentence_nr": 3 + "score": 0.7587397825317436, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.545311114945696, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7406375008540003, - "sentence_nr": 3 + "score": 0.3392375549949802, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9160988509714175, - "sentence_nr": 3 + "score": 0.7348936586421482, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.14025775160081475, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3132252321342574, - "sentence_nr": 3 + "score": 0.5380752589178658, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.35025412310639736, - "sentence_nr": 3 + "score": 0.4637878319059324, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6825372617659788, - "sentence_nr": 3 + "score": 0.6919476196061328, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.13453927150397377, - "sentence_nr": 3 + "score": 0.17694975149532557, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.10522974272748564, - "sentence_nr": 3 + "score": 0.5437152782473114, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.22055493694673897, - "sentence_nr": 3 + "score": 0.1457684614972261, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3931965048763613, - "sentence_nr": 3 + "score": 0.5633850959085152, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.13755274871304535, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.10397715306705207, - "sentence_nr": 3 + "score": 0.6120635842558794, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.13899941210887606, - "sentence_nr": 3 + "score": 0.5303624596095554, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.10947303419437356, - "sentence_nr": 3 + "score": 0.7835371347721495, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.09122941759796505, - "sentence_nr": 3 + "score": 0.3969996397250977, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.13725861056573663, - "sentence_nr": 3 + "score": 0.7487402156832422, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, - "sentence_nr": 3 + "score": 0.9260013112796359, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1350501875730652, - "sentence_nr": 3 + "score": 0.37030468338190614, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, - "sentence_nr": 3 + "score": 0.7587397825317436, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.07933317425857943, - "sentence_nr": 3 + "score": 0.5658596262915636, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1457684614972261, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.09408024740752835, - "sentence_nr": 3 + "score": 0.5701800421590155, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.37030468338190614, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.054674609450212665, - "sentence_nr": 3 + "score": 0.7587397825317436, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.18207052811092136, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.09895358918308976, - "sentence_nr": 3 + "score": 0.5476571078959771, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.1711057433668069, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.11538184104597694, - "sentence_nr": 3 + "score": 0.1531682455208201, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.09204268041910899, - "sentence_nr": 3 + "score": 0.6009917293478183, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.6885326214539055, - "sentence_nr": 3 + "score": 0.25947507140745757, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.8229812189228393, - "sentence_nr": 3 + "score": 0.6659437947666702, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", - "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.49342175914364256, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.7267072830982378, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.8396959977515368, - "sentence_nr": 3 + "score": 0.4754189767029448, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.6374950652411382, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6643984252563968, - "sentence_nr": 3 + "score": 0.46847165370535515, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.42156430044473914, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.760856626273165, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.881580297011256, - "sentence_nr": 3 + "score": 0.5098954559839274, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.13369377363079382, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5417128748379603, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5417128748379603, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.5821747317554493, - "sentence_nr": 3 + "score": 0.2784899880299974, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7539119883011114, - "sentence_nr": 3 + "score": 0.6722683601585776, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4828085350127393, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3771310857613967, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.5824621545691198, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6375144448777752, - "sentence_nr": 3 + "score": 0.6062665280777744, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.38053710786825434, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7011240864777649, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.6885326214539055, - "sentence_nr": 3 + "score": 0.293597382795084, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.8328652216139806, - "sentence_nr": 3 + "score": 0.6760199025405591, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.27571859863660825, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", + "score": 0.743408011301782, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6282512191941445, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.14253911354181859, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.5429166339438998, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.251696695878184, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5710821658681214, - "sentence_nr": 3 + "score": 0.4088276333455685, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.7498810286408993, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7886148242134857, - "sentence_nr": 3 + "score": 0.4745035227847713, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3480442076026084, - "sentence_nr": 3 + "score": 0.195647514979229, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6142483232997242, - "sentence_nr": 3 + "score": 0.5725643788499303, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2861853478258715, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6401604432917332, - "sentence_nr": 3 + "score": 0.4352628824108997, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2861853478258715, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6401604432917332, - "sentence_nr": 3 + "score": 0.3620843366588185, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2852636439147137, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6732018003142922, - "sentence_nr": 3 + "score": 0.3550428472545064, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.26529518334824453, - "sentence_nr": 3 + "score": 0.18842393723950338, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.596699960316198, - "sentence_nr": 3 + "score": 0.5854975500881314, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.412295470431275, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.705800771033924, - "sentence_nr": 3 + "score": 0.3870105243407916, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.2104783778565715, - "sentence_nr": 3 + "score": 0.18842393723950338, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6117499551501043, - "sentence_nr": 3 + "score": 0.5854975500881314, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4747354911173249, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7360286800047513, - "sentence_nr": 3 + "score": 0.49833704863382544, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.412295470431275, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.705800771033924, - "sentence_nr": 3 + "score": 0.49833704863382544, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.18814785746917081, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5692328972915052, - "sentence_nr": 3 + "score": 0.327643461630417, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.589811312024197, - "sentence_nr": 3 + "score": 0.29010990017245786, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.31072931460421827, - "sentence_nr": 3 + "score": 0.27366048131065474, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.20149416157064579, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4306285422638574, - "sentence_nr": 3 + "score": 0.5911912768187473, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.17401517708317762, - "sentence_nr": 3 + "score": 0.20149416157064579, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.45006261596496794, - "sentence_nr": 3 + "score": 0.5931139543094289, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.151240443751577, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4224869587588239, - "sentence_nr": 3 + "score": 0.3782180895945298, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.151240443751577, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4224869587588239, - "sentence_nr": 3 + "score": 0.507081939944787, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.26860011657329247, - "sentence_nr": 3 + "score": 0.5076904123107298, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.1435622311718879, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.45714671497681403, - "sentence_nr": 3 + "score": 0.3643413837012786, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.14172292406325543, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4762857001428092, - "sentence_nr": 3 + "score": 0.16319679661526076, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4800955244005148, - "sentence_nr": 3 + "score": 0.3474927845768493, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18482936243672016, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5172215726655364, - "sentence_nr": 3 + "score": 0.5753050684342109, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.16234676720992364, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.48137970077362496, - "sentence_nr": 3 + "score": 0.5116862201536014, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.22131477988685871, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6188310784475567, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.16853790965501372, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5242065098084487, - "sentence_nr": 3 + "score": 0.49289897908980135, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.07810235385630719, - "sentence_nr": 3 + "score": 0.5769327326081999, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.10401577613691954, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.32252336426814965, - "sentence_nr": 3 + "score": 0.6248961527161889, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, - "sentence_nr": 3 + "score": 0.22179945921983923, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3614856639698008, - "sentence_nr": 3 + "score": 0.6249971903914197, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2255489037266197, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3954925749722234, - "sentence_nr": 3 + "score": 0.44548434872692594, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2927057121559396, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4330945753016968, - "sentence_nr": 3 + "score": 0.448224857029718, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18038302998635977, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.320678468026793, - "sentence_nr": 3 + "score": 0.514391848002756, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3124325727595954, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4415757258745415, - "sentence_nr": 3 + "score": 0.4519657987381455, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.21850594525107195, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4049269026117245, - "sentence_nr": 3 + "score": 0.5327944201340103, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.21233470585998818, - "sentence_nr": 3 + "score": 0.22179945921983923, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3630016390465325, - "sentence_nr": 3 + "score": 0.6249971903914197, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.21850594525107195, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4173980390626746, - "sentence_nr": 3 + "score": 0.5141209056236068, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.22325877055095214, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.38760873730223866, - "sentence_nr": 3 + "score": 0.5121650809135759, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", + "score": 0.6248961527161889, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5216428635440964, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2741229265391949, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3721657350281369, - "sentence_nr": 3 + "score": 0.4564432184821405, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.33805023952655533, - "sentence_nr": 3 + "score": 0.3555531255203411, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.23398197530631124, - "sentence_nr": 3 + "score": 0.5116862201536014, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2929807168354841, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5975595069845072, - "sentence_nr": 3 + "score": 0.520472515533923, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6689604664235209, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8010329764520807, - "sentence_nr": 3 + "score": 0.33471616336068044, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6689604664235209, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8010329764520807, - "sentence_nr": 3 + "score": 0.3198143076622585, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.14908960803395838, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4761746966391582, - "sentence_nr": 3 + "score": 0.25944035160413503, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.1935951733925871, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.43667702869251973, - "sentence_nr": 3 + "score": 0.2975936119966016, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.2466674257522263, - "sentence_nr": 3 + "score": 0.33494612818381275, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.5677534942306638, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.684329671666446, - "sentence_nr": 3 + "score": 0.33494612818381275, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.8787419089273848, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.9422733087334002, - "sentence_nr": 3 + "score": 0.23219780504948684, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6666935927206881, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.7886059879769752, - "sentence_nr": 3 + "score": 0.23219780504948684, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.38085857828188696, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3854501214118697, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.595779023757305, - "sentence_nr": 3 + "score": 0.36702287443308856, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.1999934463074552, - "sentence_nr": 3 + "score": 0.37353268221130526, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.30520457148036917, - "sentence_nr": 3 + "score": 0.5262688121999385, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4113125177363443, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.42808075762838727, - "sentence_nr": 3 + "score": 0.522087506283104, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.32685141385924577, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3758692873615971, - "sentence_nr": 3 + "score": 0.38637605442040596, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3734832062562986, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.383916695249631, - "sentence_nr": 3 + "score": 0.4054892447711709, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3804672236690253, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.433708341935832, - "sentence_nr": 3 + "score": 0.3835470698559142, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3734832062562986, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.44543323722408085, - "sentence_nr": 3 + "score": 0.33540675877467946, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4678134833959513, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5051480556620123, - "sentence_nr": 3 + "score": 0.172700810315234, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.4028998029112093, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.43422338821405304, - "sentence_nr": 3 + "score": 0.33494612818381275, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5894159589207006, - "sentence_nr": 3 + "score": 0.2961516536011624, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.633437763335489, - "sentence_nr": 3 + "score": 0.7355780986981637, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.46360731056064436, - "sentence_nr": 3 + "score": 0.2865612242047131, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5726015901952585, - "sentence_nr": 3 + "score": 0.6433813179203622, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.26518122980477765, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6541173886447416, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.3734832062562986, - "sentence_nr": 3 + "score": 0.26220676436185975, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.43771936994910393, - "sentence_nr": 3 + "score": 0.646323175287155, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 + "score": 0.2722589423069702, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.2041405149858879, - "sentence_nr": 3 + "score": 0.658571547163188, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.2728627798814474, - "sentence_nr": 3 + "score": 0.17694975149532557, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.36592034784584504, - "sentence_nr": 3 + "score": 0.5836240065197391, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6237774736059616, - "sentence_nr": 3 + "score": 0.28977907494497107, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8500131524897436, - "sentence_nr": 3 + "score": 0.6663117339552681, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.573764722928549, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.817979859532479, - "sentence_nr": 3 + "score": 0.48426682761561596, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.2320305803246989, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6224956012824276, - "sentence_nr": 3 + "score": 0.48426682761561596, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.21449459478473423, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6528501353073614, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.26697411956933875, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6328956554106696, - "sentence_nr": 3 + "score": 0.5617057003908478, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.27341185048222727, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6411651849711889, - "sentence_nr": 3 + "score": 0.49912699124761417, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.4041187386794465, - "sentence_nr": 3 + "score": 0.25336549464486463, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6888233111124319, - "sentence_nr": 3 + "score": 0.6547636046715577, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.670001214025099, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.876386276114813, - "sentence_nr": 3 + "score": 0.6309937233948154, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5645815242299279, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8151453923340255, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", + "score": 0.6285229436299309, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6285229436299309, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5972046851135996, - "sentence_nr": 3 + "score": 0.4182671264424632, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.24570408832734913, - "sentence_nr": 3 + "score": 0.29254488484029956, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 3 + "score": 0.3040559696901293, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5699365673055954, - "sentence_nr": 3 + "score": 0.6658994383739726, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.43620605921972144, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3598792258309727, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3258323649865128, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", + "score": 0.3209186108619747, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5002716530045863, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.5002716530045863, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.4276859054768592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5002716530045863, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5002716530045863, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.45100597619813854, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.44158982072668623, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39176422032430464, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5211563282115517, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.8363600587440573, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.9912737182609732, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", + "score": 0.5459722940273104, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5459722940273104, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.3258323649865128, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.6018154975998465, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7669980679050217, - "sentence_nr": 3 + "score": 0.22011791783839232, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.6212521406814923, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.20164065196183215, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3923533979663226, - "sentence_nr": 3 + "score": 0.5343147728119615, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4212555584968603, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.44264089366400194, - "sentence_nr": 3 + "score": 0.5125809225356253, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.2750774388281557, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.34712156908889796, - "sentence_nr": 3 + "score": 0.5125809225356253, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.31322885062380607, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3821582738832969, - "sentence_nr": 3 + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.47825370157575003, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.21171273476282318, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.34680801952866847, - "sentence_nr": 3 + "score": 0.5059916132562515, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.27483211854002193, - "sentence_nr": 3 + "score": 0.4813598669606701, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3332411632883488, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4039853784752083, - "sentence_nr": 3 + "score": 0.4813598669606701, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4081354056739722, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.47955174657690236, - "sentence_nr": 3 + "score": 0.4813598669606701, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.392022469660947, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.48059504328652813, - "sentence_nr": 3 + "score": 0.47825370157575003, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.511876122662448, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4081354056739722, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4731088237118851, - "sentence_nr": 3 + "score": 0.4935153102946312, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.20642179908801722, - "sentence_nr": 3 + "score": 0.47825370157575003, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.19178500195247952, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3514233824120371, - "sentence_nr": 3 + "score": 0.4935153102946312, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3471790743028735, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5354826964964929, - "sentence_nr": 3 + "score": 0.4935153102946312, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.14528679532351443, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.45626264176882697, - "sentence_nr": 3 + "score": 0.47825370157575003, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.21146239923180532, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3672220683588613, - "sentence_nr": 3 + "score": 0.47825370157575003, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.11365352023191169, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5427832684043266, - "sentence_nr": 3 + "score": 0.511876122662448, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0993195473228234, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4660581946805371, - "sentence_nr": 3 + "score": 0.4646331830467803, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.25034600728678114, - "sentence_nr": 3 + "score": 0.5063020142455625, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.14025775160081475, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.37306669253790053, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.866397551781362, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.9369949537059603, - "sentence_nr": 3 + "score": 0.5539920925426138, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.41682189465797687, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.7155178722189985, - "sentence_nr": 3 + "score": 0.5539920925426138, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5633410521280906, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.37420316460821246, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6713980677832108, - "sentence_nr": 3 + "score": 0.5963099883424426, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.09520646862489263, - "sentence_nr": 3 + "score": 0.5963099883424426, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4231985179035766, - "sentence_nr": 3 + "score": 0.5963099883424426, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.47631009147745074, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6668706097750393, - "sentence_nr": 3 + "score": 0.5963099883424426, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3312076918041707, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5907489769215882, - "sentence_nr": 3 + "score": 0.5963099883424426, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.5419642316694008, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.8375813242343603, - "sentence_nr": 3 + "score": 0.6265140753983048, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.45069082245075975, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6399673599980337, - "sentence_nr": 3 + "score": 0.5138477284777235, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.37251337991409605, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5810872572798261, - "sentence_nr": 3 + "score": 0.6265140753983048, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.19496249079519765, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.3404780052299898, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.648844691127488, - "sentence_nr": 3 + "score": 0.6245566175148537, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.33084780351073634, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5879159712556987, - "sentence_nr": 3 + "score": 0.6265140753983048, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.7708186875078075, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.8284786787443572, - "sentence_nr": 3 + "score": 0.6265140753983048, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6265140753983048, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.08603520723426224, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.41785071883461133, - "sentence_nr": 3 + "score": 0.5539920925426138, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.175658807429611, - "sentence_nr": 3 + "score": 0.27979820860022203, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.47157573411560544, - "sentence_nr": 3 + "score": 0.6056889168578378, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.20217803037339238, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4743589083194767, - "sentence_nr": 3 + "score": 0.5326397959358325, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.9048530940348648, - "sentence_nr": 3 + "score": 0.5226572946586268, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.9226314544302758, - "sentence_nr": 3 + "score": 0.5226572946586268, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4267826722481737, - "sentence_nr": 3 + "score": 0.5210387656594864, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.5309753107573227, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.18331704949485053, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4681674930025697, - "sentence_nr": 3 + "score": 0.4671158417621693, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.1481394578697113, - "sentence_nr": 3 + "score": 0.2102369368326755, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4346232049071254, - "sentence_nr": 3 + "score": 0.5768887726639784, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.26104909033290696, - "sentence_nr": 3 + "score": 0.195647514979229, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5956068369645927, - "sentence_nr": 3 + "score": 0.5857714957546027, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.4546852631699836, - "sentence_nr": 3 + "score": 0.195647514979229, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7654091839521726, - "sentence_nr": 3 + "score": 0.5857714957546027, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.9226314544302758, - "sentence_nr": 3 + "score": 0.48085787079671877, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.5309753107573227, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.1481394578697113, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.4278722851826281, - "sentence_nr": 3 + "score": 0.3859677866958932, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.10008350737965103, - "sentence_nr": 3 + "score": 0.2102369368326755, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3399171525144127, - "sentence_nr": 3 + "score": 0.5768887726639784, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3267507236200133, - "sentence_nr": 3 + "score": 0.4824140720031325, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.14710052131359536, - "sentence_nr": 3 + "score": 0.2102369368326755, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.2322531742374544, - "sentence_nr": 3 + "score": 0.5876589559170592, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.19427446513842178, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.2821379316874468, - "sentence_nr": 3 + "score": 0.5309753107573227, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.30793937214153166, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.39271105518755994, - "sentence_nr": 3 + "score": 0.5309753107573227, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.1928576545653753, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.27080894796384963, - "sentence_nr": 3 + "score": 0.5309753107573227, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.21800193956058223, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.2974641182469979, - "sentence_nr": 3 + "score": 0.4814496481274858, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.20828838183973028, - "sentence_nr": 3 + "score": 0.195647514979229, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3002418280717453, - "sentence_nr": 3 + "score": 0.5857714957546027, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.2234473632117264, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.3517185856118227, - "sentence_nr": 3 + "score": 0.5073395824633415, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.25306188056493334, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.3181932375842872, - "sentence_nr": 3 + "score": 0.5073395824633415, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.20536337741589905, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.23621003955621192, - "sentence_nr": 3 + "score": 0.5054194884603328, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.49233042976388086, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.17004486924634224, - "sentence_nr": 3 + "score": 0.4877445613866086, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.14102929105825548, - "sentence_nr": 3 + "score": 0.4903188428039103, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.16702356077048272, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.32184315197242147, - "sentence_nr": 3 + "score": 0.4877445613866086, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.48911069552546027, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.47179783140068143, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.6171472238624475, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.2246029757863831, + "score": 0.4197983611654241, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.3993213727535769, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, + "score": 0.5057748632231697, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, + "score": 0.5057748632231697, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.5079994737492071, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.37709297891717664, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6881502501430368, + "score": 0.4877445613866086, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.03502843507733942, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.5057748632231697, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.31122692088261866, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, + "score": 0.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, + "score": 0.5362380779055197, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.4765874091118851, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.45911557772276623, + "score": 0.33709347944719925, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.45022125383821326, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.46874267375238576, + "score": 0.29382595610734974, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.6350593429017282, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.6022395694696409, + "score": 0.31966312198190094, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.3407065041529668, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3482814151315599, + "score": 0.3095023687399762, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.33821252076454705, "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.6219720158712322, + "score": 0.0, "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.638758039725182, + "score": 0.33709347944719925, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.7215691881328408, + "score": 0.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.735100789804592, + "score": 0.4148097947848928, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.700487718300918, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.7205373993220106, + "score": 0.4216795433274884, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.611843760819802, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.6194911484836914, + "score": 0.4216795433274884, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.5894567062209923, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.6051783687131701, + "score": 0.39257815659454015, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.5614660831213585, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5781117871636209, + "score": 0.4148097947848928, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.5907010930652489, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.5995581839975431, + "score": 0.40151087965388177, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.33500599401126563, + "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.34371117385240735, + "score": 0.33995727836928735, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.630923553986829, + "score": 0.0, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.6458808155334796, + "score": 0.4148097947848928, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3013901676230198, + "score": 0.4148097947848928, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.38106012955734714, + "score": 0.4148097947848928, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.35187745073108273, + "score": 0.4148097947848928, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.2850647115160651, + "score": 0.40848621046151223, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.3175465093373464, + "score": 0.31805405607794895, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.43458947791319813, + "score": 0.3195891679001926, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.3222538601891173, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4033902612785559, + "score": 0.7495871587703783, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.13598345546333285, + "score": 0.1667955161379731, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4135396704381328, + "score": 0.5773664661124461, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.1624355752882384, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.3101159279982649, + "score": 0.6732778877516836, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.3146660996956415, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.774919653861933, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.23693055763743093, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.3909330178955319, + "score": 0.7180407770761651, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.23693055763743093, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.217295409663537, + "score": 0.7180407770761651, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.12173115521158184, + "score": 0.23693055763743093, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.3904544509639755, + "score": 0.7180407770761651, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, + "score": 0.653546979730166, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.38091370416670794, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.540550443602966, + "score": 0.653546979730166, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, + "score": 0.45984464012364756, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.36314748337164254, + "score": 0.2722589423069702, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5392658386159207, + "score": 0.6049790801942001, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.2293530951556094, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4689601314620498, + "score": 0.6277082350099422, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, + "score": 0.23693055763743093, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, + "score": 0.7180407770761651, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, + "score": 0.3222538601891173, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, + "score": 0.7495871587703783, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5508290063627067, + "score": 0.6277082350099422, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.35459684529390034, + "score": 0.3222538601891173, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5519360558961294, + "score": 0.7495871587703783, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.23610158425430544, + "score": 0.23693055763743093, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.46869487580371916, + "score": 0.7180407770761651, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.35459684529390034, + "score": 0.18842393723950338, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5499993547125768, + "score": 0.586796882410113, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.27062395495883934, + "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.4407436716645838, + "score": 0.3005283910333271, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3711335186021823, + "score": 0.2467789409989967, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5595427509161435, + "score": 0.702397320872287, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.2294068720558097, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.40458364050078693, + "score": 0.30577290788405437, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.32588643749980295, + "score": 0.3053963874050995, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.32003170276441123, + "score": 0.3025031428331747, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23088247483586974, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4093450185788297, + "score": 0.3038662664425978, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23274285105688466, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4091360043642453, + "score": 0.3463770747306081, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23189835231884592, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.44157797833899437, + "score": 0.3323684721255328, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.11917756990194882, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.37850093315889116, + "score": 0.3437729074300146, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23251355381714656, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.42986991593991275, + "score": 0.2984899750912363, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3330732444230803, + "score": 0.2984899750912363, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.2550184675066243, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4392529322675216, + "score": 0.2982854478221892, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23721317187079113, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4319465813689286, + "score": 0.3038662664425978, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.29417113956364643, + "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4653698220842079, + "score": 0.3758304027238242, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23088247483586974, + "score": 0.0, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.42828303349678104, + "score": 0.342927504672789, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.45237912327122276, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6299071573751139, + "score": 0.33856457279955215, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.23736810439041953, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4004852416401387, + "score": 0.3421048582632637, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.2916261378761629, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4106520926894174, + "score": 0.3355517973989557, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.282764733088686, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3743678965131091, + "score": 0.3355517973989557, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.15511550090520096, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.3922184662482167, + "score": 0.3377353924432443, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.31430120091187586, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5013155459452984, + "score": 0.3329758884511854, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.16451929399933107, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.2957279302594959, + "score": 0.3437729074300146, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.10793583834163357, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3079497311888636, + "score": 0.4660343508894544, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.11401282249739858, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3117911565455793, + "score": 0.43340932146378, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.4169735477570989, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.34019506273883837, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.48708558391259515, + "score": 0.43189251696918196, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.33184166448858593, + "score": 0.40242515214425184, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.1676136890247661, + "score": 0.0, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.38914692664434314, + "score": 0.46839189725195784, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.5605065818946205, + "score": 0.14962848372546667, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.586853267829013, + "score": 0.5531110803538978, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.36300296341860155, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5816676674074003, + "score": 0.48673932738045633, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3525399760372503, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5798116969849163, + "score": 0.48673932738045633, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3927053212677373, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5108598154804425, + "score": 0.41934944341336317, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.32163989714697483, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4414323713243047, + "score": 0.3546654565919817, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3722001929300059, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5252698638532942, + "score": 0.41134678748435993, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3514475288270508, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5361569875660316, + "score": 0.5026703894468475, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.20913838136220486, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.40960094031121963, + "score": 0.44095987534978803, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.27914759735007616, + "score": 0.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3958350231734361, + "score": 0.5725197387086665, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", + "score": 0.4817075905641231, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.463520790843597, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.20974733068050955, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.41455868084196934, + "score": 0.4423536939689341, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.16086531618356015, + "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.2797876941198672, + "score": 0.2075521577117978, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3277803741755935, + "score": 0.15980518115118317, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4267708983045122, + "score": 0.6117406545411793, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.28765408533715414, + "score": 0.21258844131063828, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4046608868073569, + "score": 0.6314891370223008, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.1819722649161304, + "score": 0.15821285888349254, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.44254730215235283, + "score": 0.6605676082065987, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.25291831689404154, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4482360279074225, + "score": 0.5352059639541527, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.279600269133294, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.48172049854477195, + "score": 0.4817621003925206, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.22872196013470597, + "score": 0.1667955161379731, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.45089857576633846, + "score": 0.6090533288062945, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.1258907882951215, + "score": 0.1667955161379731, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.34143648068854054, + "score": 0.6161305014329985, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.11986809949741643, + "score": 0.20038908500140973, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.34629467658248214, + "score": 0.6177327642561014, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.17550354183836317, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.3977547521857469, + "score": 0.4199711739851868, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.32666181171942305, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5670457942911707, + "score": 0.4199711739851868, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.18710158230410626, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6827304460872186, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.29175929784144866, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.45299010750030405, + "score": 0.5106967338510985, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.24463910693302512, + "score": 0.5084170678613665, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.21258844131063828, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.2908660541001102, + "score": 0.6314891370223008, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.13305199541830684, + "score": 0.21258844131063828, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.43244987270004115, + "score": 0.6314891370223008, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3377385620641691, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5256128450453542, + "score": 0.5220823443002603, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.44776047557667586, + "score": 0.20038908500140973, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5801193947715436, + "score": 0.6177327642561014, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2875583820017638, + "score": 0.17976451428761386, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6087635830564418, + "score": 0.6113954990048472, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.13836903384315105, + "score": 0.18710158230410626, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5516548411073219, + "score": 0.6827304460872186, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.28295596283263513, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6067794553589253, + "score": 0.44210435496259043, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.20863283213455547, + "score": 0.3807134866446316, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5109257435313587, + "score": 0.6734021595321634, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.34589895849033103, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6136307264772042, + "score": 0.5700887051433648, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2810551683573811, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6477048453606161, + "score": 0.5873831965245108, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.47099274965068205, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.473265686519562, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.7021422985630228, + "score": 0.41805694116981745, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.11810019511256618, + "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3708545152745943, + "score": 0.5317734594035952, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.3423375720396189, + "score": 0.0, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5343801172775681, + "score": 0.4843771361283998, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.12546912767038895, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.2651343523961406, + "score": 0.5873831965245108, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.12407216162020399, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.2664864612493293, + "score": 0.4566163202450332, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.11261597894135422, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.25234827342962907, + "score": 0.4566163202450332, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.12757855945289526, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3385577201847465, + "score": 0.3885961889310864, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.11556653761629153, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3140069931838876, + "score": 0.538750255869865, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.1175771442804648, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3103572690939351, + "score": 0.16097255887211387, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.12629279972753293, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.2933944065312711, + "score": 0.5719181187428595, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.1055629358593665, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.30482671461107386, + "score": 0.5719181187428595, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.12363251371327445, + "score": 0.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.2692822154793075, + "score": 0.5599655154601001, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", + "score": 0.5719181187428595, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5148079777353672, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.24470651147480013, + "score": 0.6408205679062159, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.09410612421964877, + "score": 0.16432863675964413, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.13696035837771334, + "score": 0.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.3335388002918436, + "score": 0.558984948114654, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5322520826224556, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.727131667480615, + "score": 0.48231853956144055, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5266403878479265, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6838125749299477, + "score": 0.37314692804855976, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5276151436342643, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6871514991080862, + "score": 0.4120675260154046, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5276151436342643, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6999124430022288, + "score": 0.4669656953948632, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5276151436342643, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6871514991080862, + "score": 0.4120675260154046, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5322520826224556, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.727131667480615, + "score": 0.4133673303529474, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5523722682139371, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.7471196627888963, + "score": 0.4133673303529474, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5274964121279998, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6941952618694388, + "score": 0.4120675260154046, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5523722682139371, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.7219229057874782, + "score": 0.4120675260154046, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.4695966835778606, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6515328250192374, + "score": 0.43325089547434603, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5002744991426422, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6429454824803486, + "score": 0.4599966863788958, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.40961777715484393, + "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5773184063472755, + "score": 0.4173344185979662, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.5331034421473965, + "score": 0.0, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6806929097519565, + "score": 0.4787742817228935, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2092659579124333, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5354766759595367, + "score": 0.43325089547434603, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.1284866896836278, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.54959682211865, + "score": 0.43325089547434603, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3099627272480552, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5376391724348849, + "score": 0.43325089547434603, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.15742483335373852, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.43519517439687405, + "score": 0.43325089547434603, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.32705341718250747, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.631354608587055, + "score": 0.3756494330803333, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.6144246566045058, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.7550732449201221, + "score": 0.40580143204058805, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2238855010644693, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5288881528593262, + "score": 0.4787742817228935, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5831654195482086, + "score": 0.1892240568795935, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7601444481236787, + "score": 0.5196343731603573, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.473016146288238, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6981990328342826, + "score": 0.4254686256509745, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.3088290057043984, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5023049672447087, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6885140390468562, + "score": 0.28985008910948157, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.20333448190047887, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.3649580097673384, + "score": 0.45391985808188723, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.45686283928900234, + "score": 0.22131477988685871, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7163619637625416, + "score": 0.5260789793410334, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5703017172567459, + "score": 0.17694975149532557, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.78509136371851, + "score": 0.4902785344040517, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5226605904538532, + "score": 0.18616493547741375, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5272140519221666, + "score": 0.19343014224536145, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4692685009782657, + "score": 0.19951581244033986, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2680165156355779, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5590529072823445, + "score": 0.16605690899971173, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5470973834864862, + "score": 0.15143107566514277, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.22131477988685871, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5563318425026342, + "score": 0.4362962278196088, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3200938205435179, + "score": 0.1661742929957894, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6393945963053513, + "score": 0.46515980711224425, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5478041897913022, + "score": 0.4103552603347404, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.1196655750514248, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", + "score": 0.43807296710349614, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.449673018038321, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.18882437844970767, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5789754712947318, + "score": 0.21326736707612118, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.43427164452809086, + "score": 0.1790439216234942, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.526079635392936, + "score": 0.30577290788405437, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.32001589569502475, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5274343388526991, + "score": 0.4271693186358773, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.31869191523653845, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5973293882694002, + "score": 0.44583799328544693, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3128418715354195, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5961262622141211, + "score": 0.44562997179553193, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.302221525161365, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5926217012511299, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.30470915491420003, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.542875812219914, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.289331164128846, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5798018459101258, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.2994985311892038, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.611702219968759, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.312793730905921, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.4874766693766197, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.4259108629005092, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5685406243620383, + "score": 0.394895635806623, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.41946746288765896, + "score": 0.456896886562563, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3162277660168379, + "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.533113142157349, + "score": 0.31967472422535054, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.19037861963633804, + "score": 0.0, "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5872021106043722, + "score": 0.20793313992045814, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.289331164128846, + "score": 0.0, "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5970102524600497, + "score": 0.33030039979994785, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.6666467303030572, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6471929785766445, + "score": 0.33030039979994785, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.23287896954139942, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.34835288582718865, + "score": 0.24831335764102336, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.23287896954139942, + "score": 0.0, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.3456033257148638, + "score": 0.31718447830263197, "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.1845895819969781, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.33578716975410133, + "score": 0.17332651755199632, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.20038908500140973, + "score": 0.0, "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.38365854681342043, + "score": 0.4334317870334209, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.15071676257541072, + "score": 0.0, "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.28916309026824916, + "score": 0.4317729593125941, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.6681082569496674, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.7257478469803625, + "score": 0.4711738743510459, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.18605335292758288, + "score": 0.0, "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.3760221461307777, + "score": 0.41527307940938124, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.1457684614972261, + "score": 0.0, "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.26433094519026357, + "score": 0.3909799362645297, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.3032776840667523, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.20515691941627118, + "score": 0.012870012870012871, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.5398995684986874, + "score": 0.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5601040209287937, + "score": 0.3452137418984674, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.2887138086538547, "sentence_nr": 5 @@ -55094,7 +64371,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.6342291345998248, "sentence_nr": 5 @@ -55102,7 +64379,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, "sentence_nr": 5 @@ -55110,7 +64387,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 1.0, "sentence_nr": 5 @@ -55118,7 +64395,7 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.7013062757071812, "sentence_nr": 5 @@ -55126,7 +64403,7 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.9303769449292738, "sentence_nr": 5 @@ -55134,7 +64411,7 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 @@ -55142,15 +64419,31 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.2381658499765768, "sentence_nr": 5 }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, "sentence_nr": 5 @@ -55158,7 +64451,7 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 1.0, "sentence_nr": 5 @@ -55166,7 +64459,7 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, "sentence_nr": 5 @@ -55174,7 +64467,39 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", "score": 1.0, "sentence_nr": 5 @@ -55182,7 +64507,7 @@ { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.8492326635760689, "sentence_nr": 5 @@ -55190,15 +64515,47 @@ { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.9063898435384111, "sentence_nr": 5 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.5360330312711556, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7676864590616368, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.6756014232714684, + "sentence_nr": 5 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, "sentence_nr": 5 @@ -55206,15 +64563,31 @@ { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.4529852871970908, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.6941474239078328, + "sentence_nr": 5 + }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.8522456714074852, "sentence_nr": 5 @@ -55222,47 +64595,63 @@ { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.9096914044088521, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.9457416090031758, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7912619863720214, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9129058871674676, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, "sentence_nr": 5 @@ -55270,7 +64659,7 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 1.0, "sentence_nr": 5 @@ -55278,7 +64667,7 @@ { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.9457416090031758, "sentence_nr": 5 @@ -55286,7 +64675,7 @@ { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 0.9892952933418456, "sentence_nr": 5 @@ -55294,807 +64683,1255 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37589902061551017, + "score": 0.5087473540251254, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.42554151277542873, + "score": 0.7647955332172516, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, + "score": 0.5087473540251254, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, + "score": 0.7647955332172516, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, + "score": 0.5087473540251254, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, + "score": 0.7647955332172516, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19272923456045185, + "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.314589204347422, + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.798357133373606, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31383720140423793, + "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35896609082765174, + "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, + "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2562402498959597, + "score": 0.4234885228074744, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3462132320098601, + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7543919667018285, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30613574556266654, + "score": 0.47375069012411286, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36162356523761796, + "score": 0.7543919667018285, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30613574556266654, + "score": 0.5738396574789242, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36162356523761796, + "score": 0.798357133373606, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.47410002229034043, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30613574556266654, + "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36162356523761796, + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1277700534498365, + "score": 0.4234885228074744, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20846991452438368, + "score": 0.7410180114887145, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.25985341959039815, + "score": 0.47375069012411286, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3462132320098601, + "score": 0.7543919667018285, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.42988105429544615, + "score": 0.48181149445310956, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7577244658187771, + "score": 0.7675828789334244, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5366411241731205, + "score": 0.5091224918749461, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.825566494253596, + "score": 0.7829685247145245, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.617939643800199, + "score": 0.5091224918749461, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8665162960307256, + "score": 0.7829685247145245, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4360038791211645, + "score": 0.6626129614342791, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7669087484597642, + "score": 0.8597893117683423, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6153267326643309, + "score": 0.48181149445310956, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.825440708536967, + "score": 0.7675828789334244, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4135171000263379, + "score": 0.4441961115027302, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7050151549073953, + "score": 0.7565542718609186, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3345794609803645, + "score": 0.0, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7523344918083558, + "score": 0.2828367156737383, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46997395980026974, + "score": 0.37810687864454173, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8114935753258365, + "score": 0.741392727905101, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46997395980026974, + "score": 0.37916211129533023, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8114935753258365, + "score": 0.7363359547185945, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3751840463233443, + "score": 0.44543578807748957, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.690216773228096, + "score": 0.7513336773729535, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4547722460981925, + "score": 0.4814564802258215, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.793631811653261, + "score": 0.7621649608882223, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.3088448141335011, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44338575968779337, + "score": 0.6673372115834423, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34537865578685034, + "score": 0.48181149445310956, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6736450219247083, + "score": 0.7675828789334244, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4165530720734658, + "score": 0.5091224918749461, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.7027805129995731, + "score": 0.7829685247145245, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4027788021844849, + "score": 0.48181149445310956, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6872835607174038, + "score": 0.7675828789334244, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.40245827940445855, + "score": 0.5461499540157965, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.6664090181705107, + "score": 0.7954823723658209, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.44543578807748957, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5552412314880962, + "score": 0.7513336773729535, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.6976333495952621, + "score": 0.3363225191015943, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.8331572107884448, + "score": 0.6803949682739066, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.6976333495952621, + "score": 0.2404315522172745, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.8331572107884448, + "score": 0.49155714102395526, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3477250470582593, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7188419868243952, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.40276720463657734, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6529271690805427, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.40245827940445855, + "score": 0.404727200247809, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6664090181705107, + "score": 0.6392900613840917, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1990581597344524, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5898277017367236, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5440766840557734, + "score": 0.44897710722021167, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7530101164980872, + "score": 0.6862249089515978, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5440766840557734, + "score": 0.44897710722021167, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7530101164980872, + "score": 0.6862249089515978, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.482878209362615, + "score": 0.44897710722021167, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7054264546871626, + "score": 0.6862249089515978, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22447836580911282, + "score": 0.44897710722021167, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.635962708232662, + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1990581597344524, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6133817758391366, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3964122180109575, + "score": 0.0, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.584540734626554, + "score": 0.4386229919587297, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6537813760269277, + "score": 0.44897710722021167, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7617489761353242, + "score": 0.6862249089515978, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.41307323705325416, + "score": 0.5379348324975908, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5785653391533346, + "score": 0.7703766110349561, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5248587176134882, + "score": 0.30188353873287377, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6664855309004869, + "score": 0.6086565367747951, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5248587176134882, + "score": 0.22391522968021457, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6664855309004869, + "score": 0.6087618281135659, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.32797138117025904, + "score": 0.2704091953828695, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.48645628248697975, + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.498704623570665, + "score": 0.2704091953828695, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6478746389895599, + "score": 0.6207272323003366, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.498704623570665, + "score": 0.2704091953828695, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6478746389895599, + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2280299254440877, + "score": 0.2704091953828695, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4447177675003817, + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3025029865727436, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6057244918018514, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2795351131184338, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5735871310845391, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.498704623570665, + "score": 0.2704091953828695, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6478746389895599, + "score": 0.6207272323003366, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4549681528678131, + "score": 0.2704091953828695, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6164314607426773, + "score": 0.6207272323003366, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.2704091953828695, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.05173101600908794, + "score": 0.6207272323003366, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.498704623570665, + "score": 0.2704091953828695, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6478746389895599, + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.20776569671187678, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5495289426480258, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 @@ -56102,207 +65939,319 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.062313574266204104, + "score": 0.4621757041594117, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35012358768277246, + "score": 0.22067731046885494, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5816664251371266, + "score": 0.5635661737033422, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.17729842264695017, + "score": 0.5091224918749461, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4103582047611184, + "score": 0.7829685247145245, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.34895836374229405, + "score": 0.6026286934891149, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4767378358574124, + "score": 0.8025775976044891, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4010889714538991, + "score": 0.6626129614342791, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5642546048162433, + "score": 0.8597893117683423, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30145280436636923, + "score": 0.6626129614342791, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4729753929525169, + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2622716439052442, + "score": 0.4727805712999679, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.42384915893461766, + "score": 0.7717158158167359, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35210829264331733, + "score": 0.48181149445310956, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5239651686730163, + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4063022828070774, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128058030788165, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4063022828070774, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128058030788165, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.38732841080078323, + "score": 0.7689532399280165, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4441961115027302, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7565542718609186, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.18117610753900412, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5900497114648452, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30690336937373786, + "score": 0.5465526716276092, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46380735910652676, + "score": 0.8012679276648627, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, + "score": 0.6917901740466924, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.38584042605633057, + "score": 0.8479928839177578, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.6917901740466924, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.8479928839177578, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35548377438423956, + "score": 0.5461499540157965, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5150536106864393, + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7513336773729535, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.37192780061868996, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.647030498607364, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 @@ -56310,735 +66259,1087 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35098096867859657, + "score": 0.24011079455637607, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.19920494035049138, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.38561859819475125, + "score": 0.614209720001149, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.5896613549548209, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.7528914749586836, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.8958039312312598, + "score": 0.4596980088392874, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9382091007325469, + "score": 0.713787745993602, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6433799261824519, + "score": 0.5300714512917181, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.81037697367602, + "score": 0.7461630750708693, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5403356450597102, + "score": 0.4596980088392874, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7639130574395125, + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.833078701050083, + "score": 0.43007078231141604, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9482515348146272, + "score": 0.6680243275130087, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.833078701050083, + "score": 0.4596980088392874, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9482515348146272, + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5815699184831468, + "score": 0.33359103227594633, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8158797976578578, + "score": 0.701102363286568, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2565271158193808, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5754598856447696, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6423472232689572, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.5896613549548209, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.7528914749586836, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.833078701050083, + "score": 0.4596980088392874, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9482515348146272, + "score": 0.713787745993602, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.5271017464925504, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.7749613594649343, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.8097013849965253, + "score": 0.4596980088392874, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8896806148658662, + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3014335251508215, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6287268013420879, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6161420984415483, + "score": 0.4335364472118335, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6945809713247855, + "score": 0.6878319610579101, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.833078701050083, + "score": 0.480771131185851, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9482515348146272, + "score": 0.7032048786770096, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4500531895417844, + "score": 0.4801289744823913, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.43027065541050147, + "score": 0.6766690087429765, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4933292241270431, + "score": 0.3272712268138726, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5225247297523148, + "score": 0.6272846474183881, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23487811400114963, + "score": 0.3272712268138726, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4062284746604391, + "score": 0.6272846474183881, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3763743474188506, + "score": 0.30421485886156485, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4017565065239436, + "score": 0.566236392445952, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4933292241270431, + "score": 0.32078739729528816, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.511824430191619, + "score": 0.5817366082116868, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5805399561362194, + "score": 0.32965129549221617, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4810464260105228, + "score": 0.623436907204599, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.21510618470971102, + "score": 0.0, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41380245501613677, + "score": 0.24706467963183681, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49539605131242165, + "score": 0.18623343474790552, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44728880966754114, + "score": 0.5383789355001968, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3489214645008508, + "score": 0.18623343474790552, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4519496200669607, + "score": 0.5383789355001968, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.15415064977510756, + "score": 0.32965129549221617, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.48192435154139673, + "score": 0.623436907204599, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49539605131242165, + "score": 0.32078739729528816, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44728880966754114, + "score": 0.5817366082116868, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4825434542324755, + "score": 0.476501312022118, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5617848264135781, + "score": 0.32078739729528816, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5570886750436929, + "score": 0.5817366082116868, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5212982931053122, + "score": 0.4919440349584284, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.35319015092357736, + "score": 0.32078739729528816, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5822934956325967, + "score": 0.5817366082116868, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.35319015092357736, + "score": 0.3231203125477008, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5822934956325967, + "score": 0.5812275690118908, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.48726470075632883, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.44332438338421004, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5764325110247531, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5878575558111695, + "sentence_nr": 5 + }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5095895501997145, + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6090575371936678, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49961369350177665, + "score": 0.3665134361137304, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6351045115684573, + "score": 0.6118771029352303, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5064127215831256, + "score": 0.3665134361137304, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6516332048338376, + "score": 0.6118771029352303, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3556254905627724, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.611063921488713, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3556254905627724, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.611063921488713, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35319015092357736, + "score": 0.3485799122645514, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5822934956325967, + "score": 0.6090575371936678, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.35870004213153, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5554477617576271, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.25590356077469273, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.527590401388296, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.49885432872486163, + "score": 0.4085639059221913, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6436097366017006, + "score": 0.5887623870312143, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5064127215831256, + "score": 0.3485799122645514, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6516332048338376, + "score": 0.6090575371936678, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.3485799122645514, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.6090575371936678, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.4100134571476398, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5212982931053122, + "score": 0.5856608401367807, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.37392149096896676, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5833802647068038, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.24987807848117904, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5493430788000264, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.17098323692758396, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4647137781420131, + "score": 0.5216877937894046, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5064127215831256, + "score": 0.3527295712700594, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6516332048338376, + "score": 0.6062826429226292, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3855522725905196, + "score": 0.6471892368478446, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.587260566914102, + "score": 0.8142499721936278, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4426623526629488, + "score": 0.7012294787544179, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6368371029698285, + "score": 0.8478115719875968, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3666340989897011, + "score": 0.6917901740466924, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5782960278998768, + "score": 0.8479928839177578, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.32892676518285585, + "score": 0.40202477345336673, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5800761309604682, + "score": 0.7469480084357536, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7829685247145245, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35138749399652214, + "score": 0.42612283570374254, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5704306601285767, + "score": 0.7185121839177114, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 @@ -57046,511 +67347,831 @@ { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4592978565863154, + "score": 0.2799331151961311, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.20781449703060773, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.25958657290343434, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5698163692393635, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.29456425448249246, + "score": 0.40157733283424196, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5691358329649412, + "score": 0.7133166401137868, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7341375356694393, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2915369229944523, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.562240894166207, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46550355389682435, + "score": 0.6917901740466924, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.646730632916617, + "score": 0.8479928839177578, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.15083364266523736, + "score": 0.0, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4714472446464193, + "score": 0.290046956468694, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.4625957988586645, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.7494665344743727, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.25376192011637994, + "score": 0.6917901740466924, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.47199515498282607, + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5490659903839784, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.17729842264695017, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.26349889713915725, + "score": 0.5199388279318895, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.12514328743841557, + "score": 0.23141570376732995, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.34961836061490087, + "score": 0.5938624587877649, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19650854773882592, + "score": 0.23713320246552005, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5134302167765095, + "score": 0.6106842970161642, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.21690365808279138, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.518761522736185, + "score": 0.5384773678665918, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.23114663823833642, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5205634208063233, + "score": 0.5786592584609213, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.22128776529156546, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.518761522736185, + "score": 0.5609439249510223, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.22128776529156546, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.572263303016663, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.21690365808279138, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.518761522736185, + "score": 0.5502221839528678, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.23114663823833642, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.518761522736185, + "score": 0.5786592584609213, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2371332024655201, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.555256623366109, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2371332024655201, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.555256623366109, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.1998573974138024, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4397936463531347, + "score": 0.540043957078071, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.22656720908801994, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.553314787782705, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.233078149078302, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5966212933325205, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.39814417587130846, + "score": 0.3282518529729176, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6510906999464993, + "score": 0.6453010665294326, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19650854773882592, + "score": 0.0, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5134302167765095, + "score": 0.2124551208803614, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.30752616970214336, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.507227991110909, + "score": 0.6051452460471443, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1739898487873076, + "score": 0.310441435588881, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5216901258730671, + "score": 0.6413164971104282, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.310441435588881, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.6368374223560669, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2080315522738391, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5483451429916664, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.23114663823833642, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.48994178177127756, + "score": 0.5814841210741494, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.22656720908801994, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4916235564562672, + "score": 0.5465750236858569, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5106109398471469, + "score": 0.6888365053466561, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7688046995197549, + "score": 0.8656273480576243, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.44778459441351737, + "score": 0.25711386542134795, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7043336945393497, + "score": 0.6088853751738869, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.32214112487007024, + "score": 0.25711386542134795, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7794716829174484, + "score": 0.6088853751738869, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.3416581331218724, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5564992960428438, + "score": 0.6578570934289981, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.3060368950930089, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6736142284622013, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.44778459441351737, + "score": 0.24456656109396324, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7043336945393497, + "score": 0.629934465484704, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5129586382458503, + "score": 0.3060368950930089, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7857394056399366, + "score": 0.6736142284622013, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.629934465484704, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.629934465484704, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.48222455960294414, + "score": 0.3423591961656694, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7511716303980656, + "score": 0.6570214418399444, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.47986315176610383, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.335386511803452, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6803949682739066, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5106109398471469, + "score": 0.6888365053466561, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7688046995197549, + "score": 0.8656273480576243, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3494188591554153, + "score": 0.23198210427894825, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7072510223788713, + "score": 0.6195872175593368, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, + "score": 0.24456656109396324, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, + "score": 0.629934465484704, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4544489097785626, + "score": 0.3060368950930089, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8155088724539601, + "score": 0.6736142284622013, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.3423591961656694, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6570214418399444, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.2281399713503153, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6211104268881504, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 5 @@ -57558,13135 +68179,220191 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.39693478727333953, + "score": 0.46965980060137014, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4824766987096576, + "score": 0.24456656109396324, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7665355959167616, + "score": 0.629934465484704, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6158161554766717, + "score": 0.5069487414732323, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8568982835533138, + "score": 0.7801245319017357, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6535194995338728, + "score": 0.5695988432761473, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8909391457425937, + "score": 0.7516103467926585, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6535194995338728, + "score": 0.6358974376699329, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8909391457425937, + "score": 0.736661937085844, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, + "score": 0.5695988432761473, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8116199676115453, + "score": 0.7516103467926585, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6535194995338728, + "score": 0.8522456714074852, "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8909391457425937, + "score": 0.9096914044088521, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6535194995338728, + "score": 0.45307778036928104, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8909391457425937, + "score": 0.6935397252637394, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, + "score": 0.45307778036928104, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8116199676115453, + "score": 0.6935397252637394, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, + "score": 0.5695988432761473, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8116199676115453, + "score": 0.7516103467926585, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4062749424452353, + "score": 0.5695988432761473, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7717578180410056, + "score": 0.7516103467926585, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35116777059394766, + "score": 0.5695988432761473, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7265247193057359, + "score": 0.7516103467926585, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, + "score": 0.4719458927872361, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8116199676115453, + "score": 0.6691100411531956, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4373266725468241, + "score": 0.43385612637937937, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7265900332348232, + "score": 0.6652156431356278, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.8492326635760689, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6810920170253699, + "score": 0.9027320255916917, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.8151678595510182, + "score": 0.5695988432761473, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.8873630455888943, + "score": 0.7516103467926585, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.8571061116877262, + "score": 0.8522456714074852, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.906026511295714, + "score": 0.9096914044088521, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", "metric": "bleu", - "score": 0.8151678595510182, + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.4085639059221913, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6786065713232268, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.30614023358320086, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5870676308171808, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.2281399713503153, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6211104268881504, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.7196315267102845, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7361065921505279, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", "metric": "chrf", - "score": 0.8873630455888943, + "score": 0.7361065921505279, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5386933265263314, + "score": 0.5072784644062104, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6707532211471023, + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7361065921505279, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7298378378464025, + "score": 0.5072784644062104, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8542310686470678, + "score": 0.7361065921505279, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5852187596735429, + "score": 0.5072784644062104, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7147018027438421, + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7667541011433795, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7667541011433795, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5852187596735429, + "score": 0.5072784644062104, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7147018027438421, + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.7196315267102845, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1815358071299676, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5527491412738096, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7483293841345244, + "score": 0.7196315267102845, "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8146526693270999, + "score": 0.8835331636515565, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7483293841345244, + "score": 0.6289868866690355, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8146526693270999, + "score": 0.7818060345351223, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5124776602965491, + "score": 0.7196315267102845, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6385609025659063, + "score": 0.8835331636515565, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.7196315267102845, "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.11392322187442314, + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.8385943306861641, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.480771131185851, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.39882161697649804, + "score": 0.705252762035012, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5852187596735429, + "score": 0.445107576642247, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7147018027438421, + "score": 0.6955301378913092, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31684822717918226, + "score": 0.40157733283424196, "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4984008175596484, + "score": 0.6532350818978572, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31684822717918226, + "score": 0.38091370416670794, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41779931059703573, + "score": 0.6438225861756911, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31684822717918226, + "score": 0.31374450602681464, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4715336632468998, + "score": 0.6422405832556486, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.4924584878270648, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.01834337391695103, + "score": 0.7062510642584722, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.3132768114661938, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6093422256500997, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1540231640374204, + "score": 0.1624355752882384, "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36182698586213136, + "score": 0.5090396683756193, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3682241310101735, + "score": 0.30752616970214336, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5606184355158915, + "score": 0.5976254557718147, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2765950320972588, + "score": 0.15083364266523736, "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4432072463778114, + "score": 0.5008630255601011, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3682241310101735, + "score": 0.15083364266523736, "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5450474312451057, + "score": 0.5008630255601011, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20248027846537173, + "score": 0.36227557436010244, "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.435318130545113, + "score": 0.6470050797908481, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.288122591812262, + "score": 0.4246163317880344, "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.43296860754666744, + "score": 0.6588756520218197, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3682241310101735, + "score": 0.2927057121559396, "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5450474312451057, + "score": 0.5325023221897424, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.17415784669090767, + "score": 0.45506803308128024, "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3586870164339305, + "score": 0.7117510256855165, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3682241310101735, + "score": 0.44903323241491255, "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5606184355158915, + "score": 0.7473828748083323, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.419468515826214, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6664000694648706, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.4938015541936678, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.7820348786317745, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.32263864160302524, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6368573348787729, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.1712473044894657, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 + "score": 0.5194487191479099, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.12858902882463452, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.35477908164501704, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.3942058093215873, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6316031412228033, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.7202697992734389, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.7202697992734389, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.1943759862788499, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.49688103957939267, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.5561195823338172, - "sentence_nr": 6 + "score": 0.1771210219016553, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5362935676066722, - "sentence_nr": 6 + "score": 0.4606490636515431, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.5803515898273521, - "sentence_nr": 6 + "score": 0.1771210219016553, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5422220468910552, - "sentence_nr": 6 + "score": 0.4606490636515431, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4204739940979302, - "sentence_nr": 6 + "score": 0.5461499540157965, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5173824078732066, - "sentence_nr": 6 + "score": 0.798357133373606, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4177866849157374, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5243375045345786, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.328872375046221, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.39142733167976973, - "sentence_nr": 6 + "score": 0.4676203687925029, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3450219162509876, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3993348853061597, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.30978068501889056, - "sentence_nr": 6 + "score": 0.22848056414159593, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.28295274449167956, - "sentence_nr": 6 + "score": 0.5972946651297018, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3567464687150701, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3651755892066728, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.33498389276277546, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.3296536654279081, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.42670493571995677, - "sentence_nr": 6 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4251985835808586, - "sentence_nr": 6 + "score": 0.7046532915279582, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2368693821608258, - "sentence_nr": 6 + "score": 0.26660603480752404, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.320909989176825, - "sentence_nr": 6 + "score": 0.6007386267555782, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.3223833286593516, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.39475158383309167, - "sentence_nr": 6 + "score": 0.11970700565377682, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.46146548771819573, - "sentence_nr": 6 + "score": 0.23357697166633196, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.4019452398054806, - "sentence_nr": 6 + "score": 0.5582260842665357, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.33438299066966715, - "sentence_nr": 6 + "score": 0.3903594390682207, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5409759573191787, - "sentence_nr": 6 + "score": 0.6662116837137958, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.37854068916316835, - "sentence_nr": 6 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5743796566387722, - "sentence_nr": 6 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.37854068916316835, - "sentence_nr": 6 + "score": 0.4727805712999679, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5768306472334509, - "sentence_nr": 6 + "score": 0.7717158158167359, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.32134504358579785, - "sentence_nr": 6 + "score": 0.36816017035411847, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5076725973953424, - "sentence_nr": 6 + "score": 0.6630063658071765, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.37456535742007424, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6126780725749296, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.29300728994404895, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4620516485185446, - "sentence_nr": 6 + "score": 0.39520818104058075, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2562150245540302, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.47046477830594896, - "sentence_nr": 6 + "score": 0.24363783193706642, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.27182849679730653, - "sentence_nr": 6 + "score": 0.36314748337164254, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5497265770945076, - "sentence_nr": 6 + "score": 0.7016516453748962, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4544505188404086, - "sentence_nr": 6 + "score": 0.36314748337164254, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6033006987520517, - "sentence_nr": 6 + "score": 0.7016516453748962, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.3509258729305825, - "sentence_nr": 6 + "score": 0.4335364472118335, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5379703355059909, - "sentence_nr": 6 + "score": 0.6966914157873363, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.19135220621724439, - "sentence_nr": 6 + "score": 0.6096687386093484, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4724042181215377, - "sentence_nr": 6 + "score": 0.737928890860239, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.38936263771250235, - "sentence_nr": 6 + "score": 0.15774545980684188, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5443518219250745, - "sentence_nr": 6 + "score": 0.510914938532951, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.28555753499459907, - "sentence_nr": 6 + "score": 0.39174440233850644, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.3416445560351976, - "sentence_nr": 6 + "score": 0.6762795187534849, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.4049402235047407, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5871644977560334, - "sentence_nr": 6 + "score": 0.24363783193706642, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.45506803308128024, - "sentence_nr": 6 + "score": 0.4526810222444627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.6477506541284608, - "sentence_nr": 6 + "score": 0.7303764654257315, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.592313615748771, - "sentence_nr": 6 + "score": 0.4393160369685383, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7382416555842614, - "sentence_nr": 6 + "score": 0.7326708250282779, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.592313615748771, - "sentence_nr": 6 + "score": 0.43769127523507534, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7568286018427376, - "sentence_nr": 6 + "score": 0.7001859044467704, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.29742953512013587, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.6260896463225846, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.16449149670902838, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.5337097549575721, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.4719458927872361, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.6863265729154345, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.5309354663044072, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.6990707992725005, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4226108216696222, - "sentence_nr": 6 + "score": 0.43385612637937937, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6331414171574684, - "sentence_nr": 6 + "score": 0.6552557413442657, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.4719458927872361, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.6863265729154345, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4226108216696222, - "sentence_nr": 6 + "score": 0.637202263020122, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6331414171574684, - "sentence_nr": 6 + "score": 0.7346216972763816, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.602867050301643, - "sentence_nr": 6 + "score": 0.2925712720836999, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7367363357155757, - "sentence_nr": 6 + "score": 0.5502827901617849, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 0.6349495142258627, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 0.7749613594649343, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.6349495142258627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.7749613594649343, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 6 + "score": 0.6349495142258627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7542976177437886, - "sentence_nr": 6 + "score": 0.7749613594649343, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.37973023491174585, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.526357446896968, - "sentence_nr": 6 + "score": 0.3902414128366543, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5582486914071635, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6678493404097802, - "sentence_nr": 6 + "score": 0.26963095785890817, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.5695988432761473, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.7516103467926585, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.6666823117022298, - "sentence_nr": 6 + "score": 0.5695988432761473, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.7571125338649978, - "sentence_nr": 6 + "score": 0.7516103467926585, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5406438522344627, - "sentence_nr": 6 + "score": 0.4598036015897535, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6520694800788391, - "sentence_nr": 6 + "score": 0.6256401299595566, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3864572432237816, - "sentence_nr": 6 + "score": 0.43385612637937937, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5849342936087653, - "sentence_nr": 6 + "score": 0.6552557413442657, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.5406438522344627, - "sentence_nr": 6 + "score": 0.4811256400535275, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6520694800788391, - "sentence_nr": 6 + "score": 0.6745199184860661, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.6977240390484037, - "sentence_nr": 6 + "score": 0.2503023845012661, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.818984467219358, - "sentence_nr": 6 + "score": 0.5179835315406534, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.7019499719108448, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.8450280883390384, - "sentence_nr": 6 + "score": 0.32084466348045076, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.7397087417978795, - "sentence_nr": 6 + "score": 0.22063120635885589, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.8865031414920428, - "sentence_nr": 6 + "score": 0.5852924591274146, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.37717457428685847, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5554130492458337, - "sentence_nr": 6 + "score": 0.31008822704072875, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.31598923484911084, - "sentence_nr": 6 + "score": 0.4506667273103674, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18953162992336403, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.45876745950873354, - "sentence_nr": 6 + "score": 0.4506667273103674, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2383770504614087, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.466645869611307, - "sentence_nr": 6 + "score": 0.31008822704072875, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31008822704072875, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.20422838465921236, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3821382671218279, - "sentence_nr": 6 + "score": 0.31008822704072875, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18207052811092134, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4504432021668592, - "sentence_nr": 6 + "score": 0.31008822704072875, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31178219518131567, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31178219518131567, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.14291173574075158, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.45184360988354105, - "sentence_nr": 6 + "score": 0.3836374068673084, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2936418375689259, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.26710323996759094, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2599542517888651, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.49098929416640624, - "sentence_nr": 6 + "score": 0.4506667273103674, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.34419514726440925, - "sentence_nr": 6 + "score": 0.4506667273103674, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.4506667273103674, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2128497674847141, - "sentence_nr": 6 + "score": 0.1673872929477023, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.438591227628555, - "sentence_nr": 6 + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3114749711831053, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.27743662258385243, - "sentence_nr": 6 + "score": 0.31802371065401513, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.21081851067789198, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4042801758173556, - "sentence_nr": 6 + "score": 0.31008822704072875, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.7629273292796576, - "sentence_nr": 6 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8510385544954956, - "sentence_nr": 6 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.22894939325531252, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.90941532255964, - "sentence_nr": 6 + "score": 0.5747669845604989, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.27545321289806546, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.6280000881172884, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.22894939325531252, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.5883432091316184, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.166352496246992, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5330423528994436, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6425503166524515, - "sentence_nr": 6 + "score": 0.224188058954654, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8078891929749037, - "sentence_nr": 6 + "score": 0.5978847447208526, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6425503166524515, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8078891929749037, - "sentence_nr": 6 + "score": 0.3500905496410207, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.7629273292796576, - "sentence_nr": 6 + "score": 0.1378592993183041, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8510385544954956, - "sentence_nr": 6 + "score": 0.3954726082641455, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6888074582865503, - "sentence_nr": 6 + "score": 0.2680165156355779, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8055061207769505, - "sentence_nr": 6 + "score": 0.545567244447617, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6888074582865503, - "sentence_nr": 6 + "score": 0.25376192011637994, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8151715541788959, - "sentence_nr": 6 + "score": 0.6052149632449516, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.3631421561362529, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.6978429290017016, - "sentence_nr": 6 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.7717858931341154, - "sentence_nr": 6 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.18824072812426187, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.49349163706233623, - "sentence_nr": 6 + "score": 0.3086172473271798, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.694445271037971, - "sentence_nr": 6 + "score": 0.6217822674304354, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.3037643089519314, - "sentence_nr": 6 + "score": 0.3086172473271798, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5183662698462751, - "sentence_nr": 6 + "score": 0.6217822674304354, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.18376711147874328, - "sentence_nr": 6 + "score": 0.3086172473271798, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3981272326046884, - "sentence_nr": 6 + "score": 0.6217822674304354, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.340960560695735, - "sentence_nr": 6 + "score": 0.3423591961656694, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.5217663812589132, - "sentence_nr": 6 + "score": 0.6803639512204375, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.18189587992135597, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.43964080400724653, - "sentence_nr": 6 + "score": 0.36539169772085134, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.2147607499133801, - "sentence_nr": 6 + "score": 0.3411488281065382, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3976144917079093, - "sentence_nr": 6 + "score": 0.6740035136770584, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.3941175366175992, - "sentence_nr": 6 + "score": 0.7221847203387323, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5506555496793699, - "sentence_nr": 6 + "score": 0.8931067231936596, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6495308560002527, - "sentence_nr": 6 + "score": 0.3416581331218724, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7114963534919011, - "sentence_nr": 6 + "score": 0.6578570934289981, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.3339087646492816, - "sentence_nr": 6 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5995623358499859, - "sentence_nr": 6 + "score": 0.92923260511913, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.23357697166633196, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.6610479563844994, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.4122974402951816, - "sentence_nr": 6 + "score": 0.21270024173913482, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.6170911690364487, - "sentence_nr": 6 + "score": 0.6183813548597394, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.2281399713503153, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.2643854378698732, - "sentence_nr": 6 + "score": 0.5664428061469187, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.4066689638009577, - "sentence_nr": 6 + "score": 0.19920494035049138, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.5267604642487788, - "sentence_nr": 6 + "score": 0.614209720001149, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.8253498772794055, - "sentence_nr": 6 + "score": 0.23357697166633196, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8529564805429163, - "sentence_nr": 6 + "score": 0.6610479563844994, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.7944837206494969, - "sentence_nr": 6 + "score": 0.23357697166633196, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8784531740275225, - "sentence_nr": 6 + "score": 0.6610479563844994, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.7944837206494969, - "sentence_nr": 6 + "score": 0.23374920560961487, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8784531740275225, - "sentence_nr": 6 + "score": 0.6381858968225665, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.6941268297866866, - "sentence_nr": 6 + "score": 0.19468124777669177, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7679844670813416, - "sentence_nr": 6 + "score": 0.5932036830661304, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.8253498772794055, - "sentence_nr": 6 + "score": 0.1990581597344524, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.8529564805429163, - "sentence_nr": 6 + "score": 0.5425407305974037, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5401725898595141, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.7497446479932584, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6211104268881504, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.7072172847953276, - "sentence_nr": 6 + "score": 0.220294066346937, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7914639887327892, - "sentence_nr": 6 + "score": 0.6375628454216249, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.7072172847953276, - "sentence_nr": 6 + "score": 0.6026286934891149, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7914639887327892, - "sentence_nr": 6 + "score": 0.8385943306861641, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 6 + "score": 0.6026286934891149, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.7668993520558344, - "sentence_nr": 6 + "score": 0.8385943306861641, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.22012248449054683, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.647344487348367, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.175538121835486, - "sentence_nr": 6 + "score": 0.13737279171076758, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.44197441533246407, - "sentence_nr": 6 + "score": 0.42785667387454995, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.17923344640485428, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5211683330085515, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.5828833474188783, - "sentence_nr": 6 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7908226509294533, - "sentence_nr": 6 + "score": 0.7342525133793019, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.38694317759010316, - "sentence_nr": 6 + "score": 0.4797543511401896, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5953878513137957, - "sentence_nr": 6 + "score": 0.7240781310560407, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.2620499195763038, - "sentence_nr": 6 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.48937240022909234, - "sentence_nr": 6 + "score": 0.7342525133793019, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.22128776529156546, - "sentence_nr": 6 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.4999323991212311, - "sentence_nr": 6 + "score": 0.7342525133793019, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5420890779002704, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7219273458493682, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.26332019392396333, - "sentence_nr": 6 + "score": 0.28943182557959846, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5893220054460814, - "sentence_nr": 6 + "score": 0.5795851744687439, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5924993690004501, - "sentence_nr": 6 + "score": 0.7644556249154987, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.28943182557959846, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5795851744687439, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.28943182557959846, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5795851744687439, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.2745762486209681, - "sentence_nr": 6 + "score": 0.2677353447271197, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5730023382770898, - "sentence_nr": 6 + "score": 0.569529411820844, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5420890779002704, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7219273458493682, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15821285888349262, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4886015917518962, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4161791450287817, - "sentence_nr": 6 + "score": 0.5072784644062104, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6046852394200818, - "sentence_nr": 6 + "score": 0.7342525133793019, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.3070898761263382, - "sentence_nr": 6 + "score": 0.17793925745339384, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5791648909423264, - "sentence_nr": 6 + "score": 0.5497069221194746, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.31600229153053044, - "sentence_nr": 6 + "score": 0.5679161104357995, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5374439094267343, - "sentence_nr": 6 + "score": 0.7564733289707379, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5679161104357995, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7564733289707379, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1712473044894657, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5194487191479099, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.27733310601709266, - "sentence_nr": 6 + "score": 0.2567770437062668, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4703077247331959, - "sentence_nr": 6 + "score": 0.38457089506267517, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7203673717155472, - "sentence_nr": 6 + "score": 0.6582292681072595, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.5040673596100225, - "sentence_nr": 6 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6469962279041276, - "sentence_nr": 6 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.22816849039973935, - "sentence_nr": 6 + "score": 0.6401876410870359, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.49849908693271183, - "sentence_nr": 6 + "score": 0.7526484951226097, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.5106109398471469, - "sentence_nr": 6 + "score": 0.5184341074271375, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6635467152827634, - "sentence_nr": 6 + "score": 0.7295047041623038, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.325909498033977, - "sentence_nr": 6 + "score": 0.4252502464011162, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5501364764829885, - "sentence_nr": 6 + "score": 0.6774296788457803, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.4252502464011162, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.6774296788457803, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4369942407063455, - "sentence_nr": 6 + "score": 0.18543829210530705, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6009573115008555, - "sentence_nr": 6 + "score": 0.52927058814847, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3009687072297843, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5341810386314462, - "sentence_nr": 6 + "score": 0.14107526427034148, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3140382293917749, - "sentence_nr": 6 + "score": 0.18623343474790552, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5631437828635808, - "sentence_nr": 6 + "score": 0.5348516130206653, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.7498810286408993, - "sentence_nr": 6 + "score": 0.18623343474790552, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.7726337964681356, - "sentence_nr": 6 + "score": 0.5348516130206653, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.7498810286408993, - "sentence_nr": 6 + "score": 0.42643704825557327, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.7726337964681356, - "sentence_nr": 6 + "score": 0.6730449758221991, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.22816849039973935, - "sentence_nr": 6 + "score": 0.4252502464011162, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.49849908693271183, - "sentence_nr": 6 + "score": 0.6774296788457803, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.185715135067742, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4167743222652789, - "sentence_nr": 6 + "score": 0.5098093454005965, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.4806216298219478, - "sentence_nr": 6 + "score": 0.42643704825557327, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.6311658995293531, - "sentence_nr": 6 + "score": 0.6730449758221991, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.7186969683828063, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.8143071707828088, - "sentence_nr": 6 + "score": 0.12497004250886251, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.4252502464011162, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.395494817172382, - "sentence_nr": 6 + "score": 0.6803639512204375, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.28648682864686603, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.48210216762305635, - "sentence_nr": 6 + "score": 0.543546241720005, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.4252502464011162, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5029543425204815, - "sentence_nr": 6 + "score": 0.6774296788457803, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.22543108408457457, - "sentence_nr": 6 + "score": 0.18543829210530705, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5721164465661742, - "sentence_nr": 6 + "score": 0.5283005433854684, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.15495096883986592, - "sentence_nr": 6 + "score": 0.20313747122261766, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.45421263258392414, - "sentence_nr": 6 + "score": 0.5392632080295834, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.18623343474790552, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5138104164912963, - "sentence_nr": 6 + "score": 0.5348516130206653, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.33057129676705455, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.42099734580654347, - "sentence_nr": 6 + "score": 0.5669225664686625, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.24518299917312847, - "sentence_nr": 6 + "score": 0.33057129676705455, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5623282316325473, - "sentence_nr": 6 + "score": 0.5669225664686625, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.33057129676705455, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4773779562574767, - "sentence_nr": 6 + "score": 0.5669225664686625, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.3240220869485148, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.5364140651922888, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.22119423000583918, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5855963149167847, - "sentence_nr": 6 + "score": 0.4301823405286034, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.2772639581765057, - "sentence_nr": 6 + "score": 0.4301823405286034, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.5194247346787363, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6585810035136251, - "sentence_nr": 6 + "score": 0.24914989711092594, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.16111212240349498, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5162765195160328, - "sentence_nr": 6 + "score": 0.4320304661292458, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.23109536367862135, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5693079918450474, - "sentence_nr": 6 + "score": 0.4320304661292458, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5052082359105701, - "sentence_nr": 6 + "score": 0.4301823405286034, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.17991078645928837, - "sentence_nr": 6 + "score": 0.3240220869485148, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5159811845433955, - "sentence_nr": 6 + "score": 0.5364140651922888, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.35504200505176187, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5693079918450474, - "sentence_nr": 6 + "score": 0.41768335112041305, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.1794560313432444, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5236301264596329, - "sentence_nr": 6 + "score": 0.41786513699087335, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.45963072970927465, - "sentence_nr": 6 + "score": 0.4301823405286034, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.1794560313432444, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5236301264596329, - "sentence_nr": 6 + "score": 0.4084622939366714, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.33032772118856274, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.45963072970927465, - "sentence_nr": 6 + "score": 0.5637799127470854, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.33032772118856274, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4506310431662278, - "sentence_nr": 6 + "score": 0.5637799127470854, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.35504200505176187, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5693079918450474, - "sentence_nr": 6 + "score": 0.4301823405286034, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.33057129676705455, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4492950042617377, - "sentence_nr": 6 + "score": 0.5669225664686625, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4608738248525917, - "sentence_nr": 6 + "score": 0.4301823405286034, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.4252891537802403, - "sentence_nr": 6 + "score": 0.5468017145144113, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6269243845872724, - "sentence_nr": 6 + "score": 0.7519227909172003, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6486932415130529, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.788686710424071, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.49831162551286645, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6906494695103921, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6486932415130529, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7887116805325072, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6325987025085013, - "sentence_nr": 6 + "score": 0.4790714250659131, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7966336957924106, - "sentence_nr": 6 + "score": 0.6938678729026243, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6486932415130529, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.788686710424071, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.5694299147290928, - "sentence_nr": 6 + "score": 0.480771131185851, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7161974280320248, - "sentence_nr": 6 + "score": 0.7084427046662088, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6486932415130529, - "sentence_nr": 6 + "score": 0.480771131185851, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.788686710424071, - "sentence_nr": 6 + "score": 0.7084427046662088, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.49831162551286645, - "sentence_nr": 6 + "score": 0.5420890779002704, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6906247423308508, - "sentence_nr": 6 + "score": 0.7268331815757023, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.2819665911730608, - "sentence_nr": 6 + "score": 0.5420890779002704, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5919372748765395, - "sentence_nr": 6 + "score": 0.7268331815757023, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6486932415130529, - "sentence_nr": 6 + "score": 0.4186091892833126, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.788686710424071, - "sentence_nr": 6 + "score": 0.6654623051601114, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.2624310277292268, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6025429011085721, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.6486932415130529, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7887116805325072, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.7490853969372642, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.7869453805471358, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.4970449067437269, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5549084692917513, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.47136688868251947, - "sentence_nr": 6 + "score": 0.6004981752197522, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.532838700147956, - "sentence_nr": 6 + "score": 0.7697646564917222, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.4186091892833126, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.6654623051601114, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.39057277619092257, - "sentence_nr": 6 + "score": 0.4790714250659131, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3934016321632531, - "sentence_nr": 6 + "score": 0.7010793195917541, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.43937095446369234, - "sentence_nr": 6 + "score": 0.5420890779002704, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.46181721677136944, - "sentence_nr": 6 + "score": 0.7268331815757023, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3246935344198473, - "sentence_nr": 6 + "score": 0.617939643800199, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.4118595729651108, - "sentence_nr": 6 + "score": 0.8356543644789964, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.7490853969372642, - "sentence_nr": 6 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.8062091543413888, - "sentence_nr": 6 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4462203715133425, - "sentence_nr": 6 + "score": 0.617939643800199, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5604772871598175, - "sentence_nr": 6 + "score": 0.8356543644789964, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 6 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 6 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.7490853969372642, - "sentence_nr": 6 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.8062091543413888, - "sentence_nr": 6 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.5054091115759235, - "sentence_nr": 6 + "score": 0.6526460174517784, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6683122485502007, - "sentence_nr": 6 + "score": 0.8267638445308375, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.7378351342269067, - "sentence_nr": 6 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7822638455166255, - "sentence_nr": 6 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.49023502313124495, - "sentence_nr": 7 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7638414724136195, - "sentence_nr": 7 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4424906782646928, - "sentence_nr": 7 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.705507971295129, - "sentence_nr": 7 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.41452787844405115, - "sentence_nr": 7 + "score": 0.6912804407652906, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6973605663974715, - "sentence_nr": 7 + "score": 0.8416888527493164, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.41032302768839235, - "sentence_nr": 7 + "score": 0.6912804407652906, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6634154486532953, - "sentence_nr": 7 + "score": 0.8416888527493164, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.343041631179768, - "sentence_nr": 7 + "score": 0.3025029865727436, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6383283286161612, - "sentence_nr": 7 + "score": 0.6057244918018514, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.44711013370113256, - "sentence_nr": 7 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7319347493436125, - "sentence_nr": 7 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.4275810014748856, - "sentence_nr": 7 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.6481070648129139, - "sentence_nr": 7 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.5702655877666989, - "sentence_nr": 7 + "score": 0.8522456714074852, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.8232854345902009, - "sentence_nr": 7 + "score": 0.9096914044088521, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.46409619603227925, - "sentence_nr": 7 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7474126325188408, - "sentence_nr": 7 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.0422060018445322, - "sentence_nr": 7 + "score": 0.8492326635760689, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.27278456488226854, - "sentence_nr": 7 + "score": 0.9027320255916917, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.42803425515420807, - "sentence_nr": 7 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7260183442795153, - "sentence_nr": 7 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.23962966980870534, - "sentence_nr": 7 + "score": 0.3386854985606571, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.5138361143222901, - "sentence_nr": 7 + "score": 0.604413581883028, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "bleu", - "score": 0.3597862823053843, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fil", + "task": "translation_from", "metric": "chrf", - "score": 0.7212767938301806, - "sentence_nr": 7 + "score": 0.7663313999772253, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.07860105393900486, - "sentence_nr": 7 + "score": 0.27720938018510377, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.09678377693633947, - "sentence_nr": 7 + "score": 0.428047180290638, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.11601141307045003, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.21671187566850864, - "sentence_nr": 7 + "score": 0.30350690419450826, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.2390076354901812, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.33570154125476054, - "sentence_nr": 7 + "score": 0.2813985981593422, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.1371661844308428, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.23455679137513727, - "sentence_nr": 7 + "score": 0.28107488868712643, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2954873212263811, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.1775614884118737, - "sentence_nr": 7 + "score": 0.2974410142531065, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.21326369102393236, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.24781828193168487, - "sentence_nr": 7 + "score": 0.30042054271881197, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.30142704700265815, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.30142704700265815, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.12394460940540938, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.26662620996190534, - "sentence_nr": 7 + "score": 0.3334615788010355, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3018990564467044, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2688025405888032, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.30538115660133164, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.338864446519003, - "sentence_nr": 7 + "score": 0.29942074717273737, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2982195617869878, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.14891504773093184, - "sentence_nr": 7 + "score": 0.3050638713235347, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.2618919111168516, - "sentence_nr": 7 + "score": 0.5082087402765254, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", + "score": 0.30142704700265815, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.30142704700265815, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.1406879778177777, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.24227488458492952, - "sentence_nr": 7 + "score": 0.3444107902907301, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.04114212836378985, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.1070604518443882, - "sentence_nr": 7 + "score": 0.2773273497281852, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "bleu", - "score": 0.26411327741267115, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "arz", + "task": "translation_from", "metric": "chrf", - "score": 0.2898946819245943, - "sentence_nr": 7 + "score": 0.29942074717273737, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.35015224715252113, - "sentence_nr": 7 + "score": 0.6281881652405527, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5701648579139658, - "sentence_nr": 7 + "score": 0.7361567090943679, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3349252032650068, - "sentence_nr": 7 + "score": 0.569133886912883, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5908087431574293, - "sentence_nr": 7 + "score": 0.6834516951654327, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3258812297722265, - "sentence_nr": 7 + "score": 0.16807611261595506, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5753985304712377, - "sentence_nr": 7 + "score": 0.4597054186181326, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.2883113322808919, - "sentence_nr": 7 + "score": 0.3343063479794574, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5835478395499368, - "sentence_nr": 7 + "score": 0.5429171669983389, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.4504780990115136, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6386322492678208, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.11690741296020518, - "sentence_nr": 7 + "score": 0.21555378801920327, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.44984705715342654, - "sentence_nr": 7 + "score": 0.48837390458060403, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.2577716972449781, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5171901208397282, - "sentence_nr": 7 + "score": 0.22847893469128855, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.04631732527976412, - "sentence_nr": 7 + "score": 0.1512514932863718, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.21558480215297515, - "sentence_nr": 7 + "score": 0.3515865992727904, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.25639784746935274, - "sentence_nr": 7 + "score": 0.1512514932863718, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5733913952323451, - "sentence_nr": 7 + "score": 0.3515865992727904, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.23287896954139942, - "sentence_nr": 7 + "score": 0.2915369229944523, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5168980964497457, - "sentence_nr": 7 + "score": 0.535395621261131, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.40910310335214356, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6380312874183272, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.24643585808835486, - "sentence_nr": 7 + "score": 0.5650247638590733, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5459613462641708, - "sentence_nr": 7 + "score": 0.7341016750688163, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4504780990115136, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.004597701149425286, - "sentence_nr": 7 + "score": 0.6386322492678208, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.23325505861671614, - "sentence_nr": 7 + "score": 0.1813423031516851, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.503948422566616, - "sentence_nr": 7 + "score": 0.4592771215097494, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3449058130015412, - "sentence_nr": 7 + "score": 0.4504780990115136, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5365619830343804, - "sentence_nr": 7 + "score": 0.6386322492678208, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.29688845677442144, - "sentence_nr": 7 + "score": 0.623652672746999, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5494319015457763, - "sentence_nr": 7 + "score": 0.7064310568035931, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3257602417321556, - "sentence_nr": 7 + "score": 0.40910310335214356, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5666596539835803, - "sentence_nr": 7 + "score": 0.6380312874183272, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.22511140285349446, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.553839023223762, - "sentence_nr": 7 + "score": 0.33132739382030574, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.24479697566202357, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5213023098886357, - "sentence_nr": 7 + "score": 0.21241088191397664, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", "metric": "bleu", - "score": 0.3274016883618531, - "sentence_nr": 7 + "score": 0.21511238963872098, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", "metric": "chrf", - "score": 0.5570399656004248, - "sentence_nr": 7 + "score": 0.48967538401421223, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.2211880505010663, - "sentence_nr": 7 + "score": 0.27249745234058675, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4573855767208229, - "sentence_nr": 7 + "score": 0.576487806400357, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.45149688763848994, - "sentence_nr": 7 + "score": 0.32078739729528816, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6166500596693574, - "sentence_nr": 7 + "score": 0.5779838399768712, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.399477857457097, - "sentence_nr": 7 + "score": 0.32965129549221617, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5617218895807364, - "sentence_nr": 7 + "score": 0.5788023273137882, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.1860962119549805, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.10125638619893, - "sentence_nr": 7 + "score": 0.5438504570088443, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.3191349966700777, - "sentence_nr": 7 + "score": 0.17923344640485428, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5329750656706205, - "sentence_nr": 7 + "score": 0.5428745815211856, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.09175663647957763, - "sentence_nr": 7 + "score": 0.18543829210530705, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.32499940569388225, - "sentence_nr": 7 + "score": 0.5354482399943388, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.162496560019558, - "sentence_nr": 7 + "score": 0.18951629567590744, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4637542439867255, - "sentence_nr": 7 + "score": 0.5515559648122452, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.25376032254696296, - "sentence_nr": 7 + "score": 0.1860962119549805, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5334329403985332, - "sentence_nr": 7 + "score": 0.5438504570088443, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.20039141607873007, - "sentence_nr": 7 + "score": 0.1860962119549805, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.36123312088832493, - "sentence_nr": 7 + "score": 0.5438504570088443, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.19032892442937785, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.0564437248458207, - "sentence_nr": 7 + "score": 0.5194565258434112, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.15026037463138217, - "sentence_nr": 7 + "score": 0.18437427949667837, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4303467795130825, - "sentence_nr": 7 + "score": 0.5211412954589442, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.2529076741385625, - "sentence_nr": 7 + "score": 0.18107197870881736, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5430833248313275, - "sentence_nr": 7 + "score": 0.533623377476928, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.207314191412716, - "sentence_nr": 7 + "score": 0.548958765126221, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.4360555836773355, - "sentence_nr": 7 + "score": 0.7425459638873632, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.08070632004040007, - "sentence_nr": 7 + "score": 0.3365047447281543, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.35911678207067443, - "sentence_nr": 7 + "score": 0.5791325287918098, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.4221671351559825, - "sentence_nr": 7 + "score": 0.18437427949667837, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6302060108035411, - "sentence_nr": 7 + "score": 0.5211412954589442, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.4398690431123469, - "sentence_nr": 7 + "score": 0.3365047447281543, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.6046405925677363, - "sentence_nr": 7 + "score": 0.5791325287918098, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.30594422683254774, - "sentence_nr": 7 + "score": 0.3365047447281543, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5439400651386468, - "sentence_nr": 7 + "score": 0.5791325287918098, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.24968557018529272, - "sentence_nr": 7 + "score": 0.15071676257541072, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.5511430757077329, - "sentence_nr": 7 + "score": 0.5080729257689284, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.14598608091257087, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.036093834539820895, - "sentence_nr": 7 + "score": 0.4458625802506543, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "bleu", - "score": 0.1939545119098376, - "sentence_nr": 7 + "score": 0.1860962119549805, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "th", + "task": "translation_from", "metric": "chrf", - "score": 0.48320036215224016, - "sentence_nr": 7 + "score": 0.5438504570088443, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.32707695373369694, - "sentence_nr": 7 + "score": 0.4814564802258215, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5166643606783462, - "sentence_nr": 7 + "score": 0.7954823723658209, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3069937936246452, - "sentence_nr": 7 + "score": 0.3742893656007335, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5024648105961349, - "sentence_nr": 7 + "score": 0.7582803042224814, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3575909322256676, - "sentence_nr": 7 + "score": 0.6316839256114659, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5409483829147745, - "sentence_nr": 7 + "score": 0.8143078359179658, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.2228729825024992, - "sentence_nr": 7 + "score": 0.5069487414732323, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.4466759653076362, - "sentence_nr": 7 + "score": 0.8112065454752675, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.28261688976967947, - "sentence_nr": 7 + "score": 0.22012248449054683, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5075237416590358, - "sentence_nr": 7 + "score": 0.6156007392092506, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3235473265529593, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5441122251341168, - "sentence_nr": 7 + "score": 0.25811803218589047, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.2781578586520005, - "sentence_nr": 7 + "score": 0.31186879016424773, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3796663901127053, - "sentence_nr": 7 + "score": 0.6488151565290091, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3935019932504074, - "sentence_nr": 7 + "score": 0.31186879016424773, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6071903247613194, - "sentence_nr": 7 + "score": 0.6488151565290091, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3288143137394372, - "sentence_nr": 7 + "score": 0.44543578807748957, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5596092732231619, - "sentence_nr": 7 + "score": 0.7492834759166062, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.19920494035049138, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.614209720001149, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.4122335241726334, - "sentence_nr": 7 + "score": 0.3025029865727436, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6323888082640657, - "sentence_nr": 7 + "score": 0.6286065041873574, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.12858902882463447, - "sentence_nr": 7 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.3148709023566568, - "sentence_nr": 7 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.17140863043800483, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.39681418211766745, - "sentence_nr": 7 + "score": 0.20031295840594252, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3927237741677927, - "sentence_nr": 7 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.7451438087039315, - "sentence_nr": 7 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.5570357635362685, - "sentence_nr": 7 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.8116469942298856, - "sentence_nr": 7 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.3508597296865219, - "sentence_nr": 7 + "score": 0.5465526716276092, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6833592152043626, - "sentence_nr": 7 + "score": 0.8012679276648627, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.21259470439331316, - "sentence_nr": 7 + "score": 0.17509131039045966, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.5863866793721222, - "sentence_nr": 7 + "score": 0.4886834175779622, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.38084051173962913, - "sentence_nr": 7 + "score": 0.15573964185427053, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.7334510090568515, - "sentence_nr": 7 + "score": 0.3372949202573946, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", "metric": "bleu", - "score": 0.26513488970168847, - "sentence_nr": 7 + "score": 0.22894939325531252, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", "metric": "chrf", - "score": 0.6029932145447834, - "sentence_nr": 7 + "score": 0.6048598347770396, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.4432782054917686, - "sentence_nr": 7 + "score": 0.602867050301643, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.7181569025811343, - "sentence_nr": 7 + "score": 0.8176176657543648, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.6263164471220594, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.852013904460107, - "sentence_nr": 7 + "score": 0.5386695403411698, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.5112867162620864, - "sentence_nr": 7 + "score": 0.378882732439682, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.7497537018148864, - "sentence_nr": 7 + "score": 0.6841096204411963, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.32547291366749675, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6738469931497133, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.4667782254569818, - "sentence_nr": 7 + "score": 0.631218480549812, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.7754094279644977, - "sentence_nr": 7 + "score": 0.7874289440091755, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.10177931989613292, - "sentence_nr": 7 + "score": 0.19865054013223624, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.3335479382455017, - "sentence_nr": 7 + "score": 0.6164064592365338, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3459167762620119, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.7155724078484401, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3826576187198625, - "sentence_nr": 7 + "score": 0.12858902882463452, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6071841372061269, - "sentence_nr": 7 + "score": 0.44304178293591645, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3447241447679157, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5531085140985558, - "sentence_nr": 7 + "score": 0.3283378949269553, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3178743908080705, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.5513949312034092, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.1883251048230039, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.45303225382772006, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.2378706071654586, - "sentence_nr": 7 + "score": 0.1897992267368494, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.49630155585737695, - "sentence_nr": 7 + "score": 0.5849112100276023, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.3860973950960897, - "sentence_nr": 7 + "score": 0.5718109192406814, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6271680934322363, - "sentence_nr": 7 + "score": 0.798422026035913, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.24967756802190116, - "sentence_nr": 7 + "score": 0.631218480549812, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.559682285505658, - "sentence_nr": 7 + "score": 0.8445422169928634, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.36440851219076265, - "sentence_nr": 7 + "score": 0.2111187176080899, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6157376412237141, - "sentence_nr": 7 + "score": 0.6020583416224236, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.38048895490051765, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6349497388372479, - "sentence_nr": 7 + "score": 0.6947687298202525, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6947687298202525, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.4324371049196428, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.6305851137521162, - "sentence_nr": 7 + "score": 0.46992509483298506, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 7 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.01937817581496422, - "sentence_nr": 7 + "score": 0.2534837513667069, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "bleu", - "score": 0.1805414152287055, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "ml", + "task": "translation_from", "metric": "chrf", - "score": 0.4894585255537274, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.31128635710849173, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6304411194127884, - "sentence_nr": 7 + "score": 0.671938683171001, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.13308561809919006, - "sentence_nr": 7 + "score": 0.5206571060403834, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5312476702183977, - "sentence_nr": 7 + "score": 0.689324258927, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2579124920342433, - "sentence_nr": 7 + "score": 0.30344371233327844, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5545120254366757, - "sentence_nr": 7 + "score": 0.6219235056961488, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.1595487507830045, - "sentence_nr": 7 + "score": 0.21555378801920327, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.45111566089364774, - "sentence_nr": 7 + "score": 0.5577976700241679, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.44119978444380453, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6860816819201474, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.16510868745008767, - "sentence_nr": 7 + "score": 0.18728674627858763, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.4762714594756596, - "sentence_nr": 7 + "score": 0.5182121492744396, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.27710310401156996, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5216248191624099, - "sentence_nr": 7 + "score": 0.17601203382268035, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.16253030682894548, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.31575942061963186, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.16253030682894548, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.31575942061963186, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.303998162324503, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5931856951819833, - "sentence_nr": 7 + "score": 0.21030548059060677, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.14495639555867468, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2909086706553049, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.0921413422353044, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.40696144066138723, - "sentence_nr": 7 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6476222098586478, - "sentence_nr": 7 + "score": 0.7202697992734389, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.39631066492420963, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.6670602127484115, - "sentence_nr": 7 + "score": 0.258900870705422, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.4063022828070774, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6789996206024372, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.2625805454451497, - "sentence_nr": 7 + "score": 0.3957399456352439, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5886806140244891, - "sentence_nr": 7 + "score": 0.6505957913794083, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.36039626112317097, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5998911326651838, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.41443024325505773, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6439781798190682, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.06797010899515823, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.27154181329396565, - "sentence_nr": 7 + "score": 0.11217219041746629, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "bleu", - "score": 0.19568007857684672, - "sentence_nr": 7 + "score": 0.27571859863660825, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "or", + "task": "translation_from", "metric": "chrf", - "score": 0.5228407307909605, - "sentence_nr": 7 + "score": 0.5218771218644234, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2879556779114461, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.4554184077174173, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.09578921953028982, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.40472887922389433, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.22965669823067916, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.46247819390492995, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.08920952468433085, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.32241875701400735, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.636016958488394, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2059931729749887, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.4491812480433597, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.20475739007221866, - "sentence_nr": 7 + "score": 0.4174441728660793, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.3934874462686164, - "sentence_nr": 7 + "score": 0.6692136096184196, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.1719646079342664, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.308102700736633, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.35174128537520233, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5232532816160403, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.11684343186914438, - "sentence_nr": 7 + "score": 0.39811631946890474, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.40293579310759836, - "sentence_nr": 7 + "score": 0.6320908834639722, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.4174441728660793, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.6692136096184196, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.28255079601170635, - "sentence_nr": 7 + "score": 0.4441961115027302, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.4828223682720399, - "sentence_nr": 7 + "score": 0.7565542718609186, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.4174441728660793, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.2023651649328507, - "sentence_nr": 7 + "score": 0.6692136096184196, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.1689706894436884, - "sentence_nr": 7 + "score": 0.18003301924565476, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.32609144958957464, - "sentence_nr": 7 + "score": 0.5051819537131238, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.38729516708438194, - "sentence_nr": 7 + "score": 0.4174441728660793, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6571482446395243, - "sentence_nr": 7 + "score": 0.6706681340881337, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.32995628251235876, - "sentence_nr": 7 + "score": 0.39811631946890474, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5982616321404195, - "sentence_nr": 7 + "score": 0.6320908834639722, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.4381454708258676, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6163746220282033, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.3614922712385951, - "sentence_nr": 7 + "score": 0.4186091892833126, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5698489012763526, - "sentence_nr": 7 + "score": 0.6649304720642071, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.2572958792096885, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.5064333161464132, - "sentence_nr": 7 + "score": 0.17837875461384597, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", "metric": "bleu", - "score": 0.4730742700342366, - "sentence_nr": 7 + "score": 0.3984098807009828, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", "metric": "chrf", - "score": 0.6716818492415609, - "sentence_nr": 7 + "score": 0.636016958488394, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.41278042192714015, - "sentence_nr": 7 + "score": 0.17150296156301634, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6060858750149657, - "sentence_nr": 7 + "score": 0.48812954881732445, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.5839068685770862, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7498987134962192, - "sentence_nr": 7 + "score": 0.46076979395163187, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.568128598260769, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.7443891530963911, - "sentence_nr": 7 + "score": 0.39000168645396877, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.35094536062899695, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3915774240356112, - "sentence_nr": 7 + "score": 0.31569611706824424, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.6438987110697019, - "sentence_nr": 7 + "score": 0.5247546298443336, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.17827215716412181, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.31830622503514655, - "sentence_nr": 7 + "score": 0.45573768632726, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3173241691310352, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5910464434099775, - "sentence_nr": 7 + "score": 0.4794224895461657, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.3665528144045068, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5966563047685359, - "sentence_nr": 7 + "score": 0.2744128100994792, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2587297749908005, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5132023401682766, - "sentence_nr": 7 + "score": 0.2744128100994792, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2914897522509679, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5714769597200869, - "sentence_nr": 7 + "score": 0.29898487912917937, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.11150937707712508, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4170915413269471, - "sentence_nr": 7 + "score": 0.2556170391311767, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.11675747661776523, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.4301556378791635, - "sentence_nr": 7 + "score": 0.32927415290866546, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.20057225201358211, - "sentence_nr": 7 + "score": 0.402200098075857, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.47576367606491715, - "sentence_nr": 7 + "score": 0.6018817450865662, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.1078517242048809, - "sentence_nr": 7 + "score": 0.2978808647663532, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.3886262536746606, - "sentence_nr": 7 + "score": 0.5140749054213731, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.30250421218183904, - "sentence_nr": 7 + "score": 0.22848056414159593, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5314198518425818, - "sentence_nr": 7 + "score": 0.5921402782211889, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.2674907183014193, - "sentence_nr": 7 + "score": 0.21511238963872098, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5237435675958946, - "sentence_nr": 7 + "score": 0.5217348733264977, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.30376137001310205, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5156956617625708, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.21024692077841572, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.5403945194972577, - "sentence_nr": 7 + "score": 0.3377499269000448, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.1513630224364002, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.026104354115338492, - "sentence_nr": 7 + "score": 0.32937492594263224, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "bleu", - "score": 0.18449230121441001, - "sentence_nr": 7 + "score": 0.18879521773374403, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "ha", + "task": "translation_from", "metric": "chrf", - "score": 0.40058346018376356, - "sentence_nr": 7 + "score": 0.4618333673677675, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.29894673648596126, - "sentence_nr": 7 + "score": 0.41682189465797687, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6322815922673689, - "sentence_nr": 7 + "score": 0.6573099561830166, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.18330256089173447, - "sentence_nr": 7 + "score": 0.2852636439147137, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5328391139635578, - "sentence_nr": 7 + "score": 0.5851048071392815, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3478966138007723, - "sentence_nr": 7 + "score": 0.17636478563502966, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.666271052510266, - "sentence_nr": 7 + "score": 0.5283932773245016, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.21697301406549346, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.4876777357531764, - "sentence_nr": 7 + "score": 0.4203546552244347, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.29019765706301537, - "sentence_nr": 7 + "score": 0.34831550233828484, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.598596485843401, - "sentence_nr": 7 + "score": 0.6924946723825833, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3151295371556651, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6242597159052685, - "sentence_nr": 7 + "score": 0.29623686353922923, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.22907781804002908, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.22907781804002908, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2672991324984635, - "sentence_nr": 7 + "score": 0.1196655750514248, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6474323586139361, - "sentence_nr": 7 + "score": 0.29141398801197316, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.14025775160081475, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3031509137265966, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.14085916416769417, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.25076903008051404, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.3486251970868468, - "sentence_nr": 7 + "score": 0.4814564802258215, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6519074063738273, - "sentence_nr": 7 + "score": 0.7621649608882223, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.5047460217572859, - "sentence_nr": 7 + "score": 0.6917901740466924, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6981561913726569, - "sentence_nr": 7 + "score": 0.8479928839177578, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.3088448141335011, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.7035462512447451, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2622053872435742, - "sentence_nr": 7 + "score": 0.4186091892833126, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.6045462235214704, - "sentence_nr": 7 + "score": 0.6393114196475629, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.18097539697008458, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.09841955325773799, - "sentence_nr": 7 + "score": 0.11856660123276004, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.3327723902928814, - "sentence_nr": 7 + "score": 0.3311682798096144, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "bleu", - "score": 0.2615651536220919, - "sentence_nr": 7 + "score": 0.3164257177669852, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", + "bcp_47": "sd", + "task": "translation_from", "metric": "chrf", - "score": 0.5337436257798058, - "sentence_nr": 7 + "score": 0.5851860325042342, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.7416488036617811, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.9156308978596118, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2924496936199556, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6040522123603048, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.35611859459201994, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6591871481895288, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.3092067388523221, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6104451101668408, - "sentence_nr": 7 + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.46290680573161996, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.688869111662782, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5295672450222603, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.715157413474444, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.33262718496001725, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6707552233208028, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5995826896426277, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7943692036315023, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.6176355987862611, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7898758502538201, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.4475966481812816, - "sentence_nr": 7 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6848118022736988, - "sentence_nr": 7 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.33625310520541907, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.4899679589833683, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6996203149315261, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.23119301671666287, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.354782287640505, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.20419333453691463, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.39470297247688435, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.14599223028360678, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.28718685195806315, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.0680779227699037, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.18386904980839383, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.3263831403119468, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.2995728063785384, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.41824297302824903, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.1948950171081147, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.3579044902117876, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2409168844747761, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3760904447135035, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2409168844747761, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.34625648713313856, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.25170309939120067, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3899115496810816, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2646814749718951, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.38312949443875044, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.00882086689569064, - "sentence_nr": 7 + "score": 1.0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.1753792879326568, - "sentence_nr": 7 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3388058023792196, - "sentence_nr": 7 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.348007986647201, - "sentence_nr": 8 + "score": 0.3390387389794623, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6148736550683231, - "sentence_nr": 8 + "score": 0.6170420596680538, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.31222258402876674, - "sentence_nr": 8 + "score": 0.3142665434344143, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5549937870516303, - "sentence_nr": 8 + "score": 0.6466526067220029, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2706573913259733, - "sentence_nr": 8 + "score": 0.3751840463233443, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5619563043714905, - "sentence_nr": 8 + "score": 0.6279894552667558, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.21331098311931576, - "sentence_nr": 8 + "score": 0.19268479640608693, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.47660259733052845, - "sentence_nr": 8 + "score": 0.551397074868541, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.19803162353826262, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.40808208228398596, - "sentence_nr": 8 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.22816849039973935, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5295534280606148, - "sentence_nr": 8 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2320305803246989, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5719371199531044, - "sentence_nr": 8 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3407563025626974, - "sentence_nr": 8 + "score": 0.4797543511401896, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6182648747061787, - "sentence_nr": 8 + "score": 0.7571314915469349, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4797543511401896, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5196627001050362, - "sentence_nr": 8 + "score": 0.7571314915469349, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.18627639656696823, - "sentence_nr": 8 + "score": 0.17470942957770763, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.44356601067804086, - "sentence_nr": 8 + "score": 0.5403400891349619, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.27048170758554296, - "sentence_nr": 8 + "score": 0.19268479640608693, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5452157067944216, - "sentence_nr": 8 + "score": 0.5436964586887773, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3324437360240581, - "sentence_nr": 8 + "score": 0.5763410052067085, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3060368950930089, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3472164938104332, - "sentence_nr": 8 + "score": 0.6498981440676681, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23683075175361493, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2631328190836655, - "sentence_nr": 8 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.16455392433653304, - "sentence_nr": 8 + "score": 0.5763410052067085, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.13673885815184886, - "sentence_nr": 8 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.052821402483564636, - "sentence_nr": 8 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.17470942957770763, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.13492461680840023, - "sentence_nr": 8 + "score": 0.581881475681913, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.24343304284910333, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.10721126066665879, - "sentence_nr": 8 + "score": 0.6275577931282961, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.28571962561926445, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20388486867467934, - "sentence_nr": 8 + "score": 0.6431872581462166, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.1621568294816267, - "sentence_nr": 8 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.2655543079691671, - "sentence_nr": 8 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.14973178994918127, - "sentence_nr": 8 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.1337840368142243, - "sentence_nr": 8 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3083012995502152, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.2143764616947716, - "sentence_nr": 8 + "score": 0.6589376390020449, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.43021236941942204, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.014262006975939606, - "sentence_nr": 8 + "score": 0.7142896582178452, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.43021236941942204, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.11557977235371186, - "sentence_nr": 8 + "score": 0.7142896582178452, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.461597801606675, - "sentence_nr": 8 + "score": 0.4216890913810254, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6280777654467244, - "sentence_nr": 8 + "score": 0.6885217194158456, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4224298950114519, - "sentence_nr": 8 + "score": 0.4216890913810254, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.60823085524287, - "sentence_nr": 8 + "score": 0.6885217194158456, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.4216890913810254, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.6885217194158456, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30451258861070496, - "sentence_nr": 8 + "score": 0.3083012995502152, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4983778740634126, - "sentence_nr": 8 + "score": 0.6589376390020449, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-saba", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.3390387389794623, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-saba", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.6743433920357993, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 8 + "score": 0.30166728533047465, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5707860320039717, - "sentence_nr": 8 + "score": 0.6331697020750404, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.5351607207631776, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.7900844096102096, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.45886678012586496, - "sentence_nr": 8 + "score": 0.29176300840900793, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6234514801756209, - "sentence_nr": 8 + "score": 0.6143650111703199, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3916177035633811, - "sentence_nr": 8 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6068458202737596, - "sentence_nr": 8 + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3825188992413085, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7407084022031544, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.15465401249808575, - "sentence_nr": 8 + "score": 0.24090844358935917, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.42501995363729067, - "sentence_nr": 8 + "score": 0.5468852870478801, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30004556274899286, - "sentence_nr": 8 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.561482333900969, - "sentence_nr": 8 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36769040719718776, - "sentence_nr": 8 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4064141882459388, - "sentence_nr": 8 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.34722897369611144, - "sentence_nr": 8 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4103553163121394, - "sentence_nr": 8 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.37440084690294706, - "sentence_nr": 8 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4141871474340027, - "sentence_nr": 8 + "score": 0.7745649676018984, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36586001924521905, - "sentence_nr": 8 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.35316499124143624, - "sentence_nr": 8 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44328515185259987, - "sentence_nr": 8 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2870169689559038, - "sentence_nr": 8 + "score": 0.6509517796070665, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.37150797394258683, - "sentence_nr": 8 + "score": 0.7638521785649908, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.006130367300589213, - "sentence_nr": 8 + "score": 0.7411155087367244, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.29038853710161877, - "sentence_nr": 8 + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3286711939680359, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5944310794747374, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13547277341758465, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4830189619506113, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22970092088416938, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5537467826528029, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13547277341758465, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4685134392551311, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3359695440470467, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3096036988813059, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5894510883198948, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14957644445778928, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4378856092523028, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.437742810290776, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.621154967713859, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22481074167380632, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.49840634234674935, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.26751157705127454, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5494472552960327, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.39545121937832856, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0066610108556241394, - "sentence_nr": 8 + "score": 0.6963801389253689, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22536453058221606, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4365811373563711, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.4101479464529936, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3874773378787974, - "sentence_nr": 8 + "score": 0.7041976254287654, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31747697264511426, - "sentence_nr": 8 + "score": 0.4547900039222725, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40797778663955364, - "sentence_nr": 8 + "score": 0.6541971428810075, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.2919394073770869, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3818556455365969, - "sentence_nr": 8 + "score": 0.5957961314949175, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.17537670874647399, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3785761836985817, - "sentence_nr": 8 + "score": 0.4800889669735933, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3382865300192028, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6408187443698572, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.28336087141473976, - "sentence_nr": 8 + "score": 0.3347189874003768, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.38214734777528636, - "sentence_nr": 8 + "score": 0.6644203374869264, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23841754841770157, - "sentence_nr": 8 + "score": 0.5088645484558708, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.34481325534410395, - "sentence_nr": 8 + "score": 0.6991726442472661, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1684221470406417, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5314740727463538, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.38416065466563115, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7278436878265722, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.22845493240080628, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3818556455365969, - "sentence_nr": 8 + "score": 0.584996891148118, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.38416065466563115, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7278436878265722, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.18830095106396066, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.519072776559512, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3266011589665084, - "sentence_nr": 8 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4364077249430218, - "sentence_nr": 8 + "score": 0.786096406361039, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24796413807329218, - "sentence_nr": 8 + "score": 0.5088645484558708, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3530186228211094, - "sentence_nr": 8 + "score": 0.6991726442472661, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.09821019441701705, - "sentence_nr": 8 + "score": 0.5088645484558708, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.274825378700542, - "sentence_nr": 8 + "score": 0.6991726442472661, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2807763229912453, - "sentence_nr": 8 + "score": 0.38785611216800814, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.38395145132718883, - "sentence_nr": 8 + "score": 0.6673259967761724, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6556658100891058, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5884916014203391, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.07218766113019179, - "sentence_nr": 8 + "score": 0.16331948281960493, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.24018250025773352, - "sentence_nr": 8 + "score": 0.350650198151987, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.8056920633274978, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ur", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2096419313570871, - "sentence_nr": 8 + "score": 0.8391519966182309, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46935933364934335, - "sentence_nr": 8 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.773055573548356, - "sentence_nr": 8 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.640995178057518, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3681829215408091, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6273930299436508, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4389321784429702, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.646847036932526, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13857910426205777, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41268771676068494, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6793717376740783, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3595137194874952, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5619162673780028, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.38694317759010316, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6962124663194352, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6793717376740783, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2927181624015055, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43383878173729606, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.721993849834018, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.007378883018336222, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.16440791304482247, - "sentence_nr": 8 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3160213610127146, - "sentence_nr": 8 + "score": 0.595092211343687, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5165614670038283, - "sentence_nr": 8 + "score": 0.7971172820981081, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.30758744700466467, - "sentence_nr": 8 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4684197705189288, - "sentence_nr": 8 + "score": 0.7122562458056777, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37169237058440824, - "sentence_nr": 8 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5383668331525606, - "sentence_nr": 8 + "score": 0.7122562458056777, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.18655267161524258, - "sentence_nr": 8 + "score": 0.4207937380724192, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3640275543948514, - "sentence_nr": 8 + "score": 0.6985308026285912, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.18384948243517193, - "sentence_nr": 8 + "score": 0.651158213392685, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40935288248313256, - "sentence_nr": 8 + "score": 0.860002370506267, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.20050320605789015, - "sentence_nr": 8 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4046291070099031, - "sentence_nr": 8 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.14579837024705408, - "sentence_nr": 8 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3459916112351503, - "sentence_nr": 8 + "score": 0.8020845125558708, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31638337148949686, - "sentence_nr": 8 + "score": 0.2308087288583725, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5386981918746817, - "sentence_nr": 8 + "score": 0.5837900289346171, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2761603007895394, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.49207696507318593, - "sentence_nr": 8 + "score": 0.5110827761016288, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20630721151497294, - "sentence_nr": 8 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.31518520840312125, - "sentence_nr": 8 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5072004558983904, - "sentence_nr": 8 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.27571859863660825, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.004516711833785005, - "sentence_nr": 8 + "score": 0.5884245635756674, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.16951909200513385, - "sentence_nr": 8 + "score": 0.595092211343687, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7945212279546889, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5700185304500285, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.43011383006801057, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7140577175386648, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.18728674627858763, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5160823886915287, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3843363395779093, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "translation_to", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3380125247643079, - "sentence_nr": 8 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2961516536011624, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.49803924348035766, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3459667618766101, - "sentence_nr": 8 + "score": 0.32263864160302524, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6170810606402402, - "sentence_nr": 8 + "score": 0.6824395076981005, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23623790626704147, - "sentence_nr": 8 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5441149448679464, - "sentence_nr": 8 + "score": 0.7719180936906627, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35936994872479583, - "sentence_nr": 8 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6492026440953677, - "sentence_nr": 8 + "score": 0.6590438071804039, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1280220256953781, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3766998614914371, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4161791450287817, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7054426787013603, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.577852219465442, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7719180936906627, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.33667089470100775, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6074301230157959, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.39688965270008814, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.646373332434726, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4272870063962341, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6682855797405902, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.008777992747819234, - "sentence_nr": 8 + "score": 0.7745649676018984, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.12288887055424895, - "sentence_nr": 8 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4148619356639114, - "sentence_nr": 8 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 + "score": 0.786096406361039, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.38305978177479755, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.6061131723054572, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.34636800712900173, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5167955767158704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3675667565747676, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0067104198717751464, - "sentence_nr": 8 + "score": 0.5397693417183738, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.421151249507493, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.6889522290200047, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.38687573986922297, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6607831957682522, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.38687573986922297, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6607831957682522, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.9025232868361638, - "sentence_nr": 8 + "score": 0.425143650778693, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9169897590736298, - "sentence_nr": 8 + "score": 0.6674242019044293, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.786096406361039, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.9709835434146469, - "sentence_nr": 8 + "score": 0.4803501444747088, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9951728990866464, - "sentence_nr": 8 + "score": 0.7417101158248365, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.421151249507493, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6938674571170766, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.9154051169199643, - "sentence_nr": 8 + "score": 0.5383680940297331, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9757471794927451, - "sentence_nr": 8 + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.41368954504257266, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6745746194667386, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.421151249507493, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6889522290200047, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.8935248372106969, - "sentence_nr": 8 + "score": 0.41843795218458035, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9404428602061264, - "sentence_nr": 8 + "score": 0.6316283876832989, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.4803501444747088, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.7417101158248365, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4593546097889176, - "sentence_nr": 8 + "score": 0.42221847853238736, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.608602146246901, - "sentence_nr": 8 + "score": 0.6656008733100179, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46662929903381617, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.16341242314728613, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40072549318878165, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.13952118378975725, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4030284875466178, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.38985048513980286, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2830789070123405, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.41858897147271634, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1958598294695433, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.43120286814245795, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4489929043142396, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6339860404289296, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4844328956731527, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6798474086331312, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24305650182597577, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46912278832283355, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0064546295242688114, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.24047860794644352, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36092853787943247, - "sentence_nr": 8 + "score": 0.58198979036704, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3397180516736864, - "sentence_nr": 8 + "score": 0.4481489512240194, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6494995648532881, - "sentence_nr": 8 + "score": 0.7994721822064033, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23811989337799513, - "sentence_nr": 8 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4812080785035883, - "sentence_nr": 8 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.23168799483443045, - "sentence_nr": 8 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5252672120228886, - "sentence_nr": 8 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2889285495431631, - "sentence_nr": 8 + "score": 0.38754077501151757, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4536746865348185, - "sentence_nr": 8 + "score": 0.598503332887995, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.17268932789342512, - "sentence_nr": 8 + "score": 0.5124776602965491, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.47806194925419343, - "sentence_nr": 8 + "score": 0.7722874800637285, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.42067720018268145, - "sentence_nr": 8 + "score": 0.3942058093215873, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6309653612961436, - "sentence_nr": 8 + "score": 0.6619197609807324, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24821926635843994, - "sentence_nr": 8 + "score": 0.5124776602965491, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40551062972908847, - "sentence_nr": 8 + "score": 0.7722874800637285, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.42382856047421374, - "sentence_nr": 8 + "score": 0.5037141288692649, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6321659688090209, - "sentence_nr": 8 + "score": 0.7451862558478656, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.16310121952537132, - "sentence_nr": 8 + "score": 0.5037141288692649, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5625465668278802, - "sentence_nr": 8 + "score": 0.7451862558478656, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.3291598889023262, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6085546680624175, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4126152034907945, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6344543163574141, - "sentence_nr": 8 + "score": 0.23551878211683616, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3575980447629419, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.7994721822064033, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0018234865061998542, - "sentence_nr": 8 + "score": 0.3470839302425112, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.4845766087853281, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3722685688714949, - "sentence_nr": 8 + "score": 0.7138566289355139, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.255918614113723, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44318862516624546, - "sentence_nr": 8 + "score": 0.5512324461754572, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35285733014385007, - "sentence_nr": 8 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5945202859296662, - "sentence_nr": 8 + "score": 0.7289444696770301, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.3737098172408067, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.34940338846112967, - "sentence_nr": 8 + "score": 0.6832201170000932, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2506708132952771, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5592183664602846, - "sentence_nr": 8 + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1528571341245854, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5481360183906401, - "sentence_nr": 8 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22166358657237664, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.45834104234305023, - "sentence_nr": 8 + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22478920073209205, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4500155517039222, - "sentence_nr": 8 + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.8268199262002004, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.39159269732992946, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.7803080344509848, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.37219737664729546, - "sentence_nr": 8 + "score": 0.22436571657855092, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6020217844686402, - "sentence_nr": 8 + "score": 0.61166969974579, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35285733014385007, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5945202859296662, - "sentence_nr": 8 + "score": 0.5512324461754572, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2297888516430291, - "sentence_nr": 8 + "score": 0.22436571657855092, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.44601363908967323, - "sentence_nr": 8 + "score": 0.61166969974579, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22478920073209205, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.46169108941910525, - "sentence_nr": 8 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", + "metric": "bleu", + "score": 0.22423870508323301, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", "metric": "chrf", - "score": 0.005304235332926387, - "sentence_nr": 8 + "score": 0.6366515193698862, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3330162771465545, - "sentence_nr": 8 + "score": 0.5582775802710993, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2809009542151822, - "sentence_nr": 8 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5819888906713027, - "sentence_nr": 8 + "score": 0.8215788698315908, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4573889291137309, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6974989991762017, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5409314026600619, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8023475129738281, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11372027710077005, - "sentence_nr": 8 + "score": 0.6507561416639396, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.43980817368282343, - "sentence_nr": 8 + "score": 0.8215788698315908, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3323217739558646, - "sentence_nr": 8 + "score": 0.5967384019266717, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.65584145837095, - "sentence_nr": 8 + "score": 0.8544348080833218, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2460137257692754, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5397894338370378, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2745762486209681, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5421002898382512, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7638521785649908, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3961867597457338, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.654720368848453, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4717991357336539, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7489646628366208, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.7511573912724299, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.9453473543978153, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5170969057682974, - "sentence_nr": 8 + "score": 0.4881010344921759, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7999241778608444, - "sentence_nr": 8 + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.251696695878184, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.006260653272080335, - "sentence_nr": 8 + "score": 0.6180491939580447, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.17193972960972626, - "sentence_nr": 8 + "score": 0.5967384019266717, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.50925856841751, - "sentence_nr": 8 + "score": 0.8544348080833218, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2719326877457978, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3487575221722675, - "sentence_nr": 8 + "score": 0.6002086362682414, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.15824382329465247, - "sentence_nr": 8 + "score": 0.5971070986250356, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2847034639706718, - "sentence_nr": 8 + "score": 0.8874294965619517, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1307655887510901, - "sentence_nr": 8 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2688265704976335, - "sentence_nr": 8 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.29170205300854224, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.010715460821011002, - "sentence_nr": 8 + "score": 0.6498499527552988, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.29170205300854224, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3235165184655995, - "sentence_nr": 8 + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.29170205300854224, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.27185330211646, - "sentence_nr": 8 + "score": 0.6498499527552988, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.7211812032548905, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.7211812032548905, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.2690830377349408, - "sentence_nr": 8 + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.22537412722674852, - "sentence_nr": 8 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3609019979890711, - "sentence_nr": 8 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.28534353976384025, - "sentence_nr": 8 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.28592291256793106, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.19271102520768202, - "sentence_nr": 8 + "score": 0.6102727682426059, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.3383777404070013, - "sentence_nr": 8 + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.2748202507307579, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.010275038134729863, - "sentence_nr": 8 + "score": 0.5810363959809548, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.28571962561926445, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ja", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.29676390087816046, - "sentence_nr": 8 + "score": 0.693456244639743, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6031612036218008, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.736286703381354, - "sentence_nr": 9 + "score": 0.7395804946242599, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.39432344823662835, - "sentence_nr": 9 + "score": 0.3684981984538114, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5943452555220106, - "sentence_nr": 9 + "score": 0.5606332518476288, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.34437686643287496, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6090402109312658, - "sentence_nr": 9 + "score": 0.3694816688798906, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.39205580893266934, - "sentence_nr": 9 + "score": 0.1423071532720465, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6772940233934857, - "sentence_nr": 9 + "score": 0.5234276250101042, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.6532234058412462, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.34303589686600006, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6476809000259773, - "sentence_nr": 9 + "score": 0.5432206404657683, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.43103580001357805, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6690742226623104, - "sentence_nr": 9 + "score": 0.7121135616759211, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.478854281434795, - "sentence_nr": 9 + "score": 0.17855149299161602, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6734455797843703, - "sentence_nr": 9 + "score": 0.4966305184761827, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4330463947479356, - "sentence_nr": 9 + "score": 0.23418123261847468, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6198346106486459, - "sentence_nr": 9 + "score": 0.5587864404094985, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.2981426768485538, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.5882799317365235, - "sentence_nr": 9 + "score": 0.8020845125558708, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.22739562220830442, - "sentence_nr": 9 + "score": 0.43373675317548144, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.49805301036023364, - "sentence_nr": 9 + "score": 0.7025336737339543, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4263005628892719, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6688425476017256, - "sentence_nr": 9 + "score": 0.36802977705844575, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.4579102348988084, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.6417119032346416, - "sentence_nr": 9 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.6252078221435556, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.7406162627381982, - "sentence_nr": 9 + "score": 0.7369844404912368, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.11254397891886614, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.20623288988983426, - "sentence_nr": 9 + "score": 0.7395804946242599, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1100081929352474, - "sentence_nr": 9 + "score": 0.20826058354833846, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.18967061672400035, - "sentence_nr": 9 + "score": 0.5799650985757929, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.10772332006118607, - "sentence_nr": 9 + "score": 0.42984824697674956, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.23609036869909603, - "sentence_nr": 9 + "score": 0.7395804946242599, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.1498435848533153, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.21051700087939107, - "sentence_nr": 9 + "score": 0.43664808203158506, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.13093265020876002, - "sentence_nr": 9 + "score": 0.294467310498826, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.24423594551873207, - "sentence_nr": 9 + "score": 0.4554141323944355, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", "metric": "bleu", - "score": 0.10772332006118607, - "sentence_nr": 9 + "score": 0.32365795029773287, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", "metric": "chrf", - "score": 0.17652714369664665, - "sentence_nr": 9 + "score": 0.7121135616759211, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.1864036495127383, - "sentence_nr": 9 + "score": 0.3201911827891037, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.28188465375440136, - "sentence_nr": 9 + "score": 0.7182383858693244, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.17755132725434278, - "sentence_nr": 9 + "score": 0.4536404448264584, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.27939552769052894, - "sentence_nr": 9 + "score": 0.8020827133708689, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.1445047538382198, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.2737322242154943, - "sentence_nr": 9 + "score": 0.5134477225657772, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.10686832559533661, - "sentence_nr": 9 + "score": 0.22831386795944372, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.20609270360853799, - "sentence_nr": 9 + "score": 0.6930977635889574, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.1957899789117337, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.32253417440653254, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.137248043368656, - "sentence_nr": 9 + "score": 0.22012248449054683, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.22329074990170197, - "sentence_nr": 9 + "score": 0.6169942315547352, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.14969363386531168, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.27820986095394096, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.24493390281390082, - "sentence_nr": 9 + "score": 0.18818468056723414, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.48113625107113883, - "sentence_nr": 9 + "score": 0.5492529967313384, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.19476681308252697, - "sentence_nr": 9 + "score": 0.18818468056723414, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.42030407727741037, - "sentence_nr": 9 + "score": 0.5492529967313384, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.33600502687041833, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5162346121569341, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.10336049249219333, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.3088863284587533, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.16561315331829, - "sentence_nr": 9 + "score": 0.20617350508583818, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4491486313807806, - "sentence_nr": 9 + "score": 0.5529730302091665, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.27190910124573536, - "sentence_nr": 9 + "score": 0.21644311639014951, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5173567851798608, - "sentence_nr": 9 + "score": 0.5920345924272161, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.09851325694216304, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.3616605984753398, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.2985215837565239, - "sentence_nr": 9 + "score": 0.25755472674357427, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5357534811195178, - "sentence_nr": 9 + "score": 0.7079787462750899, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.34854547753540127, - "sentence_nr": 9 + "score": 0.25755472674357427, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.5565027260893921, - "sentence_nr": 9 + "score": 0.7217376192850543, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.11215313654295675, - "sentence_nr": 9 + "score": 0.25755472674357427, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.36001328873605765, - "sentence_nr": 9 + "score": 0.7217376192850543, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.19593487880196195, - "sentence_nr": 9 + "score": 0.22012248449054683, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4136765523891332, - "sentence_nr": 9 + "score": 0.6174396094425288, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.08839512340686698, - "sentence_nr": 9 + "score": 0.3142765374520343, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "bleu", - "score": 0.21177781620127928, - "sentence_nr": 9 + "score": 0.3060368950930089, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "mr", + "task": "translation_from", "metric": "chrf", - "score": 0.4460741740050364, - "sentence_nr": 9 + "score": 0.7004749900624669, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3459789902390003, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5620330456296532, - "sentence_nr": 9 + "score": 0.8084123599808738, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.37825713491091884, - "sentence_nr": 9 + "score": 0.4545091839935173, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5584414289480568, - "sentence_nr": 9 + "score": 0.7166050399790445, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3695375029926146, - "sentence_nr": 9 + "score": 0.2919394073770869, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.556875129479421, - "sentence_nr": 9 + "score": 0.6265777781732258, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.47923168144435746, - "sentence_nr": 9 + "score": 0.4345673759957651, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6534660189132082, - "sentence_nr": 9 + "score": 0.703388118507387, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.3118437333980883, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.49789195197291464, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.429512074830509, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6066779955199886, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.44036190349192267, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.44036190349192267, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4257605183794877, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6026940597371309, - "sentence_nr": 9 + "score": 0.43485418354574973, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.24609170069111483, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.22752748300913606, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.46867110147752883, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.39653893552930464, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5591087327390277, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.40518022025671885, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5707666164180741, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.27460305577138294, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5251472574042976, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.4257605183794877, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.6072620760408021, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2917591430729611, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.64776154598388, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.17793925745339384, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.5258943316166202, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.24287220388451114, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.4829182994799567, - "sentence_nr": 9 + "score": 0.24939081998882368, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "bleu", - "score": 0.27309322054464596, - "sentence_nr": 9 + "score": 0.35818640176176625, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "translation_to", + "bcp_47": "jv", + "task": "translation_from", "metric": "chrf", - "score": 0.5162255850430824, - "sentence_nr": 9 + "score": 0.723627810424739, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.1845747513433909, - "sentence_nr": 9 + "score": 0.4812700337596407, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.44379971518505973, - "sentence_nr": 9 + "score": 0.7668482135865776, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18212463619188357, - "sentence_nr": 9 + "score": 0.3370129264673147, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.469592540371137, - "sentence_nr": 9 + "score": 0.7096874943799061, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.20734616999079872, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5036833880605232, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2817686971402115, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5170853673805775, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.09950615774798431, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4145753205477691, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.09596136927307748, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.40849147213099996, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.216062485604554, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4780977009860418, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.22546521673609302, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5396972089286257, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.23502778906204924, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5533644883224328, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.23288432092807593, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4777685664632553, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3288562544630599, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.5875530351959068, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.024449792954766115, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.25748397762867226, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.49702079004924316, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2517176762753373, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.45137344500317134, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3128384316903283, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.48016279207050283, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.3508847643803501, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.529198044527105, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.2674628639054191, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.4784292149775752, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.18555265687599404, - "sentence_nr": 9 + "score": 0.31771674795486515, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.3943451217336116, - "sentence_nr": 9 + "score": 0.6550628376568252, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", "metric": "bleu", - "score": 0.22972631482860506, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", "metric": "chrf", - "score": 0.436102988762466, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.1587543502252646, - "sentence_nr": 9 + "score": 0.19910401453355991, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4114443619817223, - "sentence_nr": 9 + "score": 0.5815343547138478, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.35541324629951093, - "sentence_nr": 9 + "score": 0.1624355752882384, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.49416627591115303, - "sentence_nr": 9 + "score": 0.4952968469712617, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.267457541157426, - "sentence_nr": 9 + "score": 0.45307778036928104, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.4673846703066711, - "sentence_nr": 9 + "score": 0.6384504056254413, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.4201902477742268, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.21808070471467408, - "sentence_nr": 9 + "score": 0.23185078121230157, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.3966492622645894, - "sentence_nr": 9 + "score": 0.5831825813678972, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.26116607863611285, - "sentence_nr": 9 + "score": 0.40176661207669695, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.09196922936475649, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.35950194744727476, - "sentence_nr": 9 + "score": 0.12648351910430983, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.43200638115383627, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6892273787708799, - "sentence_nr": 9 + "score": 0.3806304275656041, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.341195158470265, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6539473951166187, - "sentence_nr": 9 + "score": 0.3806304275656041, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.47372467075851415, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.734800469477975, - "sentence_nr": 9 + "score": 0.4741401979744739, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.5582838437615822, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.7625459507115938, - "sentence_nr": 9 + "score": 0.4288827686761902, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.38962400400495395, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6039376140178496, - "sentence_nr": 9 + "score": 0.4655976516174543, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.45026965676007474, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6668256174353906, - "sentence_nr": 9 + "score": 0.5687448979662939, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.310668922100995, - "sentence_nr": 9 + "score": 0.2908402945446888, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.58212864821275, - "sentence_nr": 9 + "score": 0.5817930307429677, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.729605098531811, - "sentence_nr": 9 + "score": 0.3800213082631731, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8833183865941937, - "sentence_nr": 9 + "score": 0.5676463425230758, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.5585674160229753, - "sentence_nr": 9 + "score": 0.27587476896182844, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.791505922278621, - "sentence_nr": 9 + "score": 0.5801799655962208, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.5592126620745396, - "sentence_nr": 9 + "score": 0.2111187176080899, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6961094171330644, - "sentence_nr": 9 + "score": 0.535975670216605, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.29504037076486817, - "sentence_nr": 9 + "score": 0.21001906325619232, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "translation_to", + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.6837809127705262, - "sentence_nr": 9 + "score": 0.5151695082194717, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.44680913024590146, - "sentence_nr": 9 + "score": 0.2543881726648529, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "bleu", - "score": 0.648473971864945, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ta", + "task": "translation_from", "metric": "chrf", - "score": 0.8247818102038394, - "sentence_nr": 9 + "score": 0.3099293756712212, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.37489047453628294, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.40475700826319555, - "sentence_nr": 9 + "score": 0.7155230965848066, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.4831233610237384, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4349871720911447, - "sentence_nr": 9 + "score": 0.7807505267551733, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2697856975860103, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.47497024539412314, - "sentence_nr": 9 + "score": 0.5736298373015629, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3766019021279213, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.3805666011451541, - "sentence_nr": 9 + "score": 0.7318674193893624, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7807505267551733, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2604066818943325, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.38511373700997104, - "sentence_nr": 9 + "score": 0.6461214650335204, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3766019021279213, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4710260495003035, - "sentence_nr": 9 + "score": 0.7318674193893624, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.36800882629132287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7348407723095736, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.36800882629132287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7348407723095736, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.20390514683548702, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.4677317890018283, - "sentence_nr": 9 + "score": 0.6747066998707847, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8131513745396886, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.3407563025626974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7443887915363598, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.32282138800401855, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.3787500122126683, - "sentence_nr": 9 + "score": 0.6700420356552951, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.48994561421713123, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.354353831625583, - "sentence_nr": 9 + "score": 0.8131513745396886, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.4909136024426773, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.8295116386418164, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.10235881838919027, - "sentence_nr": 9 + "score": 0.3766019021279213, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.42794399630326124, - "sentence_nr": 9 + "score": 0.7318674193893624, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7807505267551733, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5168612893142901, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.09649622940465846, - "sentence_nr": 9 + "score": 0.5084550790849273, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.1832567180568652, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "fa", + "task": "translation_from", "metric": "chrf", - "score": 0.29275810079464665, - "sentence_nr": 9 + "score": 0.5727346150299959, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.5244380103905697, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6914581279144536, - "sentence_nr": 9 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4472834999328078, - "sentence_nr": 9 + "score": 0.5595205105615875, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6457130269652316, - "sentence_nr": 9 + "score": 0.8322210048001876, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.403469748891042, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5836273992135024, - "sentence_nr": 9 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4521209970489246, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6307076431103672, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.431319746325093, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6367129659739652, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.310186302993101, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5434540129901786, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.5201565256464291, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6663170490872967, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.5471998982127312, - "sentence_nr": 9 + "score": 0.31671615012203974, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "translation_to", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.668797436013741, - "sentence_nr": 9 + "score": 0.6782734900436637, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.5950978682255068, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7209575532500453, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.32679491753274487, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5592874366443522, - "sentence_nr": 9 + "score": 0.7199991365237522, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.36634140441362645, - "sentence_nr": 9 + "score": 0.2821801681960571, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5925773491774018, - "sentence_nr": 9 + "score": 0.6827049750669753, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.2672991324984635, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.5378982230702222, - "sentence_nr": 9 + "score": 0.7246227738353674, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.4880149105083363, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.7177464929662396, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.504154287515855, - "sentence_nr": 9 + "score": 0.28592291256793106, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.6074467585243234, - "sentence_nr": 9 + "score": 0.6102727682426059, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.18771816026273827, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", "metric": "chrf", - "score": 0.37594160796244835, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", "metric": "bleu", - "score": 0.2828480467326008, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3171094709345114, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7045234516083255, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.6697898834930974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.4330386622117487, - "sentence_nr": 9 + "score": 0.6934309279690296, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.26314173809974317, - "sentence_nr": 9 + "score": 0.2453392175275486, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.41943156806161835, - "sentence_nr": 9 + "score": 0.6569130291153491, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.5760406199498378, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.13950796967929138, - "sentence_nr": 9 + "score": 0.2046592065585361, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3461520644408903, - "sentence_nr": 9 + "score": 0.5835189337715896, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.17207258849758605, - "sentence_nr": 9 + "score": 0.3060368950930089, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3052503498954155, - "sentence_nr": 9 + "score": 0.6834837188844622, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.7550938397535033, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.1377448219106278, - "sentence_nr": 9 + "score": 0.20313747122261766, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.35651447515721807, - "sentence_nr": 9 + "score": 0.5506087730896332, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.5506087730896332, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.2046592065585361, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.5835189337715896, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3393109592089468, - "sentence_nr": 9 + "score": 0.4896430866960958, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5094767086148101, - "sentence_nr": 9 + "score": 0.7815961723922495, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.3653634812607, - "sentence_nr": 9 + "score": 0.2046592065585361, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.5239315135469935, - "sentence_nr": 9 + "score": 0.5835189337715896, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.3763693611344683, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.6360504215730572, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.30019266689543556, - "sentence_nr": 9 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.520168227007293, - "sentence_nr": 9 + "score": 0.5760406199498378, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.5760406199498378, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.14728212724124629, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.022925118914031796, - "sentence_nr": 9 + "score": 0.485741585706456, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "bleu", - "score": 0.10793991565723801, - "sentence_nr": 9 + "score": 0.44787223195695314, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "translation_to", + "bcp_47": "yue", + "task": "translation_from", "metric": "chrf", - "score": 0.3418311350990793, - "sentence_nr": 9 + "score": 0.7968980206907678, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.33684416564135483, - "sentence_nr": 9 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6173496967095872, - "sentence_nr": 9 + "score": 0.49125115898082056, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.21054588509072256, - "sentence_nr": 9 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.5020237474009813, - "sentence_nr": 9 + "score": 0.49125115898082056, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.21083122707088572, - "sentence_nr": 9 + "score": 0.3733543476417276, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.49504056885829906, - "sentence_nr": 9 + "score": 0.538395940979961, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.22593581165006588, - "sentence_nr": 9 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.47811810874873667, - "sentence_nr": 9 + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.1973212456326944, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4151043049244464, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.23288432092807593, - "sentence_nr": 9 + "score": 0.1973212456326944, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.571224820704715, - "sentence_nr": 9 + "score": 0.4151043049244464, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.377949467106015, - "sentence_nr": 9 + "score": 0.1973212456326944, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6288808546806746, - "sentence_nr": 9 + "score": 0.4151043049244464, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3335574881036169, - "sentence_nr": 9 + "score": 0.20763578034718042, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6172751686457948, - "sentence_nr": 9 + "score": 0.46035934390642647, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.39359852091634406, - "sentence_nr": 9 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6265459059804013, - "sentence_nr": 9 + "score": 0.49125115898082056, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.47194552522795125, - "sentence_nr": 9 + "score": 0.1973212456326944, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6449793729895639, - "sentence_nr": 9 + "score": 0.4151043049244464, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.027321912102901323, - "sentence_nr": 9 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.24481610134231654, - "sentence_nr": 9 + "score": 0.49125115898082056, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.10436839690765871, - "sentence_nr": 9 + "score": 0.3610544299180199, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4723697955467262, - "sentence_nr": 9 + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.1973212456326944, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4151043049244464, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.1973212456326944, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4151043049244464, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.10480708799994727, - "sentence_nr": 9 + "score": 0.33891487511850005, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.4347737895846244, - "sentence_nr": 9 + "score": 0.5365882254723207, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "bleu", - "score": 0.3712375815038101, - "sentence_nr": 9 + "score": 0.3733543476417276, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "translation_to", + "bcp_47": "ko", + "task": "translation_from", "metric": "chrf", - "score": 0.6205370332736169, - "sentence_nr": 9 + "score": 0.538395940979961, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.11283678603002038, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5027560731174364, - "sentence_nr": 9 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.29432909534200313, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5637465580755235, - "sentence_nr": 9 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.20665163138245418, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5424961081814776, - "sentence_nr": 9 + "score": 0.7164026439677106, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.4284945090100314, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.3720123244240524, - "sentence_nr": 9 + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.1663603558532716, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5225044219427867, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.28341626687166926, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5869314876429665, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.34496242859007625, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.33464494273746426, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.6344206060206898, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.429294349316905, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.6516072032987875, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.19771661626342427, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "translation_to", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.5115994004182517, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.23198210427894825, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", "metric": "chrf", - "score": 0.38973727109769035, - "sentence_nr": 9 + "score": 0.630711601223299, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.5848344753614038, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.6734648419604768, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.7694606959147566, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5397323593778651, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.7889494278008897, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.6901644027400852, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.4929664394953523, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.6587225864765196, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.442444266172892, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.6752890127907986, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.23487811400114963, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.35937816565888026, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.797323390576564, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.6750223515189266, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.42818224355402373, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.667901678840575, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.2453392175275486, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5365332655663203, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.2910624919304027, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.525586298564604, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5322085776775811, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5009118094880767, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5009118094880767, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.48707827505552054, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5111162100323225, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5009118094880767, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5009118094880767, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.4848369352570916, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3677323079275383, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.22738612304909625, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.47454858661827737, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.38687573986922297, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6514359547109982, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.42105372680687736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.7001171094008295, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.19910401453355991, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.5338904589112099, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.24233572351352062, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.5675102323575353, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.44327096902711544, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4245201692926341, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4614951111675217, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4614951111675217, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4848137281002213, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6213964982068823, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.22608614921612638, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.5233340279272971, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.3416198047295219, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6421557564106395, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6213964982068823, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6213964982068823, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.6213964982068823, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.47134770123856895, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.33425592140853283, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.8110307349404526, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.2767906930665974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6946453530067933, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3685289119518547, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6989312471477999, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.32393211943598493, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6439540125824591, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.32393211943598493, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6439540125824591, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.5183282721440023, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.824367835388174, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.2644894679721535, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6377622807212552, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.19547215688069816, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5978847581113598, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.5183282721440023, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.824367835388174, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7395804946242599, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.527528099078667, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4094709585736592, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.47384807927636907, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6506028604680089, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5945881910966203, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7121135616759211, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.375322162926944, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.375322162926944, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5479565964904024, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6506028604680089, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3683270402125527, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6506028604680089, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6506028604680089, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6532234058412462, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6532234058412462, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5505445890664655, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3765285487977598, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3567823943323416, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5945881910966203, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.3584668928097086, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.634863098567942, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.3584668928097086, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.634863098567942, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6266330371317139, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6506028604680089, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6266330371317139, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.2998634479378894, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4724747576866367, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.31142220978542806, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.5393731664820625, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.3589120626864888, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.5425514102500383, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.19415472735264994, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.41508997974031253, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6266330371317139, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.42062888241722096, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6813469636986809, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2600960555023324, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.19898107345153532, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5339708887761974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4770332228554784, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.21644311639014951, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6255338654372382, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4394721549501124, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20390514683548702, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6152907875442002, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.40305334040893576, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.40305334040893576, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3490251488234659, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.14832342806480867, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.33219031071031885, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.16712370229561266, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6822216627082669, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5634928669626099, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.34822073619539046, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5822525754254643, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.41763594575087865, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.17389434573554247, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.24233572351352062, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5561045459288251, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6294033705157869, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6452705345581219, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.1712473044894657, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.5272789142558241, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.2111187176080899, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.5086920944994741, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.2165768464503216, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.5453742939698616, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.15929050399664219, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.5414849269145706, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.6541357656856408, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.43281826407421803, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.4282924873829561, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.4056299814865685, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.18413533063377066, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5210859722276246, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.2573956940045279, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.6452211051786294, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.4909101855057947, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.2620612021069097, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5135749478743646, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.2620612021069097, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5135749478743646, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.20028107620075963, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.3719830026822787, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.3137810587035114, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.39569555015790975, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.7071407148576546, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.334851704167788, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.6039857021253505, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.28615556452106294, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.6299182889624744, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.33464494273746426, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5983196805551743, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.33464494273746426, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5983196805551743, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.285110649893058, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.18961526642588783, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.334851704167788, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.6474532635641537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.4787974949414673, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.713332477096005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.32952451615212436, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4924875778629721, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4021713045548922, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.4199894418900964, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.7020733406320506, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.2256540129692241, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4590213069261372, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1974694070034893, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1754193437062364, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1785676020318168, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1842384650009126, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.18458233680118352, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1774601133769828, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.4787974949414673, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.713332477096005, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4583712036944982, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.39936742298908956, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3981749822565199, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.2404315522172745, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4283144598181996, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1641956652179752, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.5508394512324739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.17905278399134197, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37257295447029826, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.12620429887108936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.35580703793872603, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.12872220631084524, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.33602633953270183, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.14465783975603244, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.30302832149927306, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.10316589800033629, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3117209570799365, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.1914846465507395, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.21374300348268538, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.03037224815656603, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0692157910411486, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.2170093014574072, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.042575418285137674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.05173688961049459, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.306354798881405, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5275640293284548, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2734283774929853, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5252214120598302, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.10203846572325131, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.33381153680096753, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1685643537060726, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.36926449644166065, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.28110751553747226, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.549858109237563, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.04563693212632035, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.2616350227334417, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.014935758919429663, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.08106107745254391, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.044304867337633724, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.20806974344498103, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1418524086391329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.38295770773758747, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.030860166165309233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1100250143829584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.21255327712152144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.43272151570555034, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17560861603765618, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4284862592126998, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.09632940954518097, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3503787449131298, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.034863432218006, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.10898936628924337, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.04181105630729634, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.12631062389493458, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.05918530850500025, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.04695194140007918, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.27375248375511296, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.06609667473412645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.26197209338359717, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.22815217447604735, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.48973384754323573, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1507980395794452, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4306039128585424, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1438459189500836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.30693371625402605, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.2288990188897003, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.48933901443699584, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.21496499417848794, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4621452070963622, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.04294724676074863, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.26827434375490194, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.009624974244068071, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.07318255686027669, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.043420474648595074, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2884095690753619, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.17382347640129553, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4061580777885601, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.10361854845420869, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.32774802711076473, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.15186969315425305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3458120002305796, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.15514179348953702, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.37626364881514784, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.05109780779294313, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.27453721503960304, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.19074380068002203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.40566585096277824, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.011099726775956284, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.011099726775956284, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.20031726728306523, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.04846885443759431, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2058585556849374, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2518167302716443, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.28020858443704566, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4608753441128863, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.18668631382554257, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4241639400875279, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.20485833586704885, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.468735805943922, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.192481383169461, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3799051443349615, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2191271005227237, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4602679885982292, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.23460587033646188, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.01252735726099625, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.273148644463442, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.09431297723472011, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3616856339096348, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.189717083187238, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.10266747466754884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3364703638684802, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.15325054291996534, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4125406716806515, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.14974627745213473, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3288440080133683, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22381487678101888, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5249370100068887, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.2284908570852028, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.2284908570852028, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.28912109037408523, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.05694565324984518, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.29870977978564917, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.2965847839642647, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22274170049761707, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4532692581443855, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22670340807570744, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5112287500509207, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.2144604484498437, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.48894052224175993, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.21001173689943997, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.423493931076046, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1819297584528903, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.45996792687703736, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.024019278322400153, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.2257355657343791, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.006232910970143225, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.06317168666869727, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.06938388878349923, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3541078046399395, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14557808399334188, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.36598346755702993, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.06897533888461813, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.2776666563000344, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.1665765483402476, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4017968725013381, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.10522014436415743, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.32544051539581526, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.13122945980689196, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3219448107305951, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.10666682719585797, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.33462901494141756, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.026001137235511722, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.035471813848611235, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.21230556005755616, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.04151505758906764, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.029020401919028414, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.14829113372489366, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.10367752489302245, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.27027811810666413, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.257476399230248, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4616305811028715, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.21429469817014857, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.48426124503105794, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.12189363728567917, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.37595660827287636, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18926971577178767, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4931453714148122, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.12938967099571722, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4720138889028141, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.02181368760128402, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.17151506368480512, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.004663531624960091, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.07262533604330305, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.037401300306846526, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.27395881217705964, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16780109158842918, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3968694014697679, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1579497466001673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5092928545844059, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16286876096900815, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3422914837190449, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.24894213883371918, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.40837997135938553, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0985948810698831, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2593923911428194, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.20198948917565754, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.34858221035657466, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.040279344208305194, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.23810512632594893, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.06888992790640074, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2874483621307283, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.10900559073774943, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2863243514773182, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.06584744332862066, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3081739316918366, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.28562106983744195, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4749518446683913, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.2500514006614894, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4683806074404792, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1631196072688366, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3502730667074754, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.21286836557101563, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.45055232014427626, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.22138385440094224, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4871238379919067, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.021395721635824105, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.19029771810891044, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.031126201157905466, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.015970144454664378, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.06929847827527827, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.30185194035792856, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11697642623186386, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.37117753637984835, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.17621963873521423, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09916146090364127, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3121110160693956, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15634566502258415, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.41100137155785554, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.05658523178050362, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.27543168466680934, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22669486951066523, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4484451941575473, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.08054298732230164, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.21478595402015604, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.011560595536104562, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15272161850945196, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3256028391793645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.25884168663817486, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.28619616266290565, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.509863149159976, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2243415229038419, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.46277723348474986, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.07368089078790738, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.41452613113710224, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17892846390928677, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.47088195615067674, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17121897852515788, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.46387669666892856, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.058474735537506775, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.302418496404915, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.012370537823050053, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.06660321132654005, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09831093939330879, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.33203866499974327, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13805615693046389, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.40787998733941394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17643078314788999, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.40757584786696294, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1989414239237112, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3791567776918788, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.19473365049885608, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.41698501435299, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.111871161288306, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.30886933073849715, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.09916009482330297, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3032928217006101, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.22001196582907748, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.05095758556418664, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.22156850902870967, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.09453698369211004, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.004225285108890082, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.0861677526401815, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2894688065678073, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.266802610525441, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4519949729282707, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17670061747630258, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.392453103603526, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1995980198896431, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4244503391142409, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.18216362398065106, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36524832602306334, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.16619218719171422, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36839901474618886, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.11322415613820716, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.08319287955437346, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.08383676689911676, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2855329690010324, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.19148282873929853, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4707949702068854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.08183353655679478, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.25007633393249695, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0981642545874085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.31793222329793575, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.16439209425428677, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4076099284398328, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.008104773738922768, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.07651300648420487, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.09478705591775652, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.33293232395887284, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.02025073422738642, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.18351062254269088, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.03524547085277134, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.15744174434548794, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.11976209355757551, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.27004759126600675, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.05835638675929789, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.26034660656430636, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.18740401341623053, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.47595080320432986, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.18529763041106434, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4741269560545019, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.09142555538569784, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.31371707771405133, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1490232164900303, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.43745835724045856, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.16000199901017287, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.38698876630115175, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2727622868168819, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.005606294971348417, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.06662245090541388, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.12752236829255797, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.12157241570357182, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4080990097991491, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.10415298161056984, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.33452632923050557, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.21204239268527586, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3846197304420823, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.18693338154817452, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4290809947421753, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.10793175369072293, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.29660617805108785, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14057105892389254, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3028381427383384, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.02520941562084078, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.14480208828142038, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.02332084309165125, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.14147721432606347, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.004763623056487517, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.07485928007606017, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0262417467440806, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.04981477508091101, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3099723997891052, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.26242764207671093, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.48831610046795776, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2697134918431208, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4954790401096134, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.11689600237805012, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.38258301195690664, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.19809535837880818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.447539350421338, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16165276884674076, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.48087204023787744, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.04376773937953893, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.24391688958872212, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.004718557257042585, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.08019304349523304, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.05614653993259943, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.26485323792360876, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.16431887969160053, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4088971379214799, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.11452508920842025, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3212742401272785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.08218359452575877, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.23905391762860753, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.17673835621668263, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3902085179927465, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.19609395632695717, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4429882447488722, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.12351824822447698, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3906003941586503, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.13635319583999642, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.2850432830231861, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.022084251686401823, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.022084251686401823, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.051272222858601425, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.21925629669878902, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.007327166905859771, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.09881239641706457, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.12403840452736142, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.30367861518441364, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2730720849895809, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4904647237477465, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2733762611506788, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5386988475624784, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.20362195873137665, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4504603915919526, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.20927351091825444, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.41232284529686536, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24513601438741284, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5084285210448447, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.05885295453595563, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.2979167122026919, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.006488743008712295, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.056679733231823716, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.04209313835422283, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.26913406771501547, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.15184278721506198, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4093399937921707, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.15478222669012726, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3550584759508654, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.06244445123318812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.28239834932587327, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.15685632649880807, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.34378295878971765, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.08266931651605855, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.31275119210218016, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.04731666745775026, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.263716605484278, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.06070088845782673, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2584364364927186, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2290266995057779, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.19149954206308856, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.06467646497347093, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2374647159547877, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.05821399139564002, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2556606247641527, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.06105309074409913, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.31300959355700714, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.21356649685679252, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4243725761948095, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.18755123479604602, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4299642051662849, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.05675489168243481, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3098329822024127, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.14459834065375157, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4652483976219767, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.16524575790277993, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4729546491009478, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.06544042341813289, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.056417721736162135, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.14707146406788849, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.050577564370191244, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.26455598459911367, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.16758563722627876, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4598125962895632, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.07875433150726119, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.2638954513805452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.12334630141873701, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3570869171580578, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.19153195331287226, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4035796398628449, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.19790095860977636, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.46847710246157187, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.13346878790838718, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.30809134673210076, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.1054433514098504, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.2840946641780818, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.06071575852216007, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.2149299083218784, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.018778588483215123, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.13941904818883186, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.005649824351905227, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.09384599631616997, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.042575418285137674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.05179431911506798, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.048025713860174464, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.24286102972715728, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.26356108382670784, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.47640029725892175, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.281597136729422, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5252125986152518, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.1738582449442553, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.40161714405254456, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.16120676251405475, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3934823211441987, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.23217334003519086, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4674701273842459, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.06367770924275107, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.006102253115653432, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.08614490649176082, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.05052791122570277, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.25244788085139286, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.18041700926694673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.43852448917973136, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.10734088848154077, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.33946796348247366, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.12499287263993265, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3031531068573407, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.14318317227039934, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.356756117753337, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.195067244269841, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.44793788693902575, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.059890467169145326, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.2852233544262429, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.12768613576122964, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3279857505284436, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.10654094597354213, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.04558952792664619, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.2502391728166904, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.05915285533036862, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.010570499657301486, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.11006734435626991, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.09486000320952537, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.35631854924113476, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.2529913062741307, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5047314299093542, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.19151044656734614, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4810197053736848, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.15720527174368754, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4715103005986015, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1853793533058344, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.47839321418703307, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.20499701307081308, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.48187091128254506, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.26559916879276785, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.21349841283886073, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.27204846616025496, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.15197436941722972, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.37271000364127155, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.17795920517030017, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.41862955401967455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.17060644184287996, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4054584763100862, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.17670199390439656, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.36682227371085463, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.12662807666447004, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.3421762778593668, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.11509582662256548, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.2567415288537121, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.16136987880724096, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.33626920748765377, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.09630909369886713, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.2559618634215989, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.07924178226890699, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.22062584481281217, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.05468777721214362, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.2495519218392036, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.09527579368424423, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0691791023737071, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.33564860418725057, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2700569573710134, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4456029881233736, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.19319119695627468, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4018622995178534, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.2195372587354865, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.3664303672465512, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.17524367912943578, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.3908643084796051, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.160280283995202, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4076009931283521, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.047088289574980104, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.25195062605509155, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.018807992767181335, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.0887797545718027, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.06437840881729344, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.26576141148273813, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.20266988583156875, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.48179719155586864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.19388048412249795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.44361702376789247, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.051668546856871944, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.30087455074312014, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.07523788658275522, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2771251147123664, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.14016000761839026, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3800106321723014, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.09979289495211781, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.300593829152537, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.13230039635238258, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3269392904147474, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.1404951243925061, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.04003396689595419, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.21079478437872567, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.11588911231177465, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.23827009172444413, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.08246064965255168, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.245661992894089, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.05599772899587482, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.318436522842419, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.3010169295163665, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.48601931182953334, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.22778648697249887, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.44620122478414426, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.14947301772770566, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.30933819017694797, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.30287590293584354, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.5604725285592409, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.24773192469144745, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.41931636733206046, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.1274094829338224, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.004712897582765101, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.06896759630799948, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.05079616735013072, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.31593754046223704, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17337747588904887, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3602154895924569, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1237012344369667, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.33331866832253354, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.13636398180563136, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.28987182748753165, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11209730709870733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.31364540519664647, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23687866002887717, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.38411414840380576, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.03153454293495729, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.19644459003645154, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17544176680792672, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3246583081139427, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.021430138391124044, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.021430138391124044, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.08369831431112969, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.13458070775959802, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.30027380966184836, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.10145234629077284, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3610324630960452, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3149975098608352, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4828711196365768, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20669234829120028, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39548218036437177, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1955903221252232, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.40982133788576824, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15417968758527056, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.37529479808790556, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.13326781658444442, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3502365985083862, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.057951298400924244, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.25485985066179334, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.04130329986722028, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.041010356073949844, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2740873282030685, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.20923298022634812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4391664941823773, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.15589802574348086, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.37894206802233305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.15697021945336284, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3714913924449907, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.10748431441036872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3292908634814674, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.13279543122431423, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.41564365771461, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.010711907071897552, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.09374809941236832, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.09199306870423013, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.22163854171424513, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.011817365490829785, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.08595451843343337, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.010616420495610124, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.1084362918047719, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.005478181714811864, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.10269142826924012, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.04059965052015497, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.05185187637560759, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.08938592536025085, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.2237243844850531, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.23349229612349748, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4473828787417339, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2565364020841773, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.450805390345959, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.12201642745653093, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.36024578318571476, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.1107058214411635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3732245061642063, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.15069183256176744, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.40277025180754966, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.047451030956212996, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.2751988338212752, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.039842290129399376, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.0904612219823137, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.1049798504546962, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.35536862718638546, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13303798096767047, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3647236067340644, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1948502778967486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.35525815981538433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12370396553485627, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.36583052207842287, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2119411486498165, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.43506560274344996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1998886980070848, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.480958678209464, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.07330323852439774, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3393411295283515, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.06089987261870556, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2933161562815446, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.19842901779549207, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2713368244937688, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.08430389952515091, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0415140550178777, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.052809267538197285, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2937198064881724, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.27439824630186443, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5150965593614537, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.24133620987777477, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.511932195436477, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2024418414576267, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.356433349373201, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.26748241941426637, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4950459974606264, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.17121897852515788, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4511162476169617, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.09796172813331827, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.010814656004254549, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0896887156447935, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2679540690980116, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1769688060281599, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.43047038034793145, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1618333627385132, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3458746996740858, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.17740552204949464, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.41741195239753426, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1442495030513253, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3320792016461631, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1864648050279449, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3847069310448368, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.11307485585998263, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3059738530196213, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.14391826157279944, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3239832814361818, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.13774913339477518, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.07000554934826074, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.2193270198435431, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.10022078146902932, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.25434273119344186, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.04870160687070819, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.2290700851514103, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.10450731677295656, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.37394939751177847, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.21973252102999322, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.41512397913463883, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.19111021356168978, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.41719140749889133, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.15991026977564765, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3236465324693797, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1668854639288255, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.42010321376888254, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.2384674585580613, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.4770811353427645, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.04614982002370652, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.27126718818718293, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.05135254464623785, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.10818248451466282, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3258308765385693, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.171833798351082, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4265037420578645, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.17393111207515277, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.39042812195808824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14437570687117765, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3403124594589958, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.20204254060396054, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.40631134079482684, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.09215607021469929, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3442184231768954, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0653324228887703, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.29791150536969546, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.10027955093430833, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3650503321876689, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0344468040504092, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.20160402357264132, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.23784721286689645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.020318277383243454, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.08661842632019902, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2474248849374247, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.045518360152495066, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2398899448049606, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.453993856664573, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.20217554656803474, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.48992480826449836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.12853443501376743, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3781656409614192, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2009118875039034, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.48691094449139866, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.24551993304721983, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4695982872144731, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.044477892435255344, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.311934411870309, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.00517631148112868, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.06740002819965461, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2452462470568165, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22922072303609867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5075702211165173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.19064689695123957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.36954921822756504, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.19591250914526429, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4012747453552514, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14508364614975736, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.31907461937638537, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22110615989317936, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.40753699277004807, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14346715239003294, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2981447906678646, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15998665872195003, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.35681333217176553, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.08600488468756139, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.11933855307350258, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.07534587005281039, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.04606011483185924, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.05077972802413032, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.044154894914459994, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.05077972802413032, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2916500733546219, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5172529356293942, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1907614927330316, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4483545139496048, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.24024632160930773, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4371716158103883, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22508089265035264, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4850615114261917, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15188475622796066, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4050868631852785, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.06951312155807014, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.11472913879621423, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0484266906744459, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.24913886287924247, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.11397867508647329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4390501380282409, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.1785851272602057, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3800733399524004, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.10927209083864609, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3154924313217727, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.14399622191067446, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.40927634117587713, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.08670431063422109, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3440590449655204, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2561794438004549, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3141794892548087, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.1786034620524431, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.21970680195024106, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.04825170735480719, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2469647404663632, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.018801687736883517, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.07620759150236847, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.03529712519454766, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.181042027648149, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.40879266567927514, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.13932127331731958, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3737850640861325, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.13077065491742576, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3664151599144951, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0984296905675516, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3288790320536164, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0968035844762605, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3901340721283281, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2599817969196305, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.004333829482338306, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.08668716511436675, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.082669215590649, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2792157437153376, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.18851320324917495, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4118109845203767, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.20113943179758872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.5054929215592371, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.12371001489967776, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.35233081465372856, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.14334269972024394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.31813833621829557, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.17921007574834458, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4219795466326858, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.02606447183652908, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.20219794591777904, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4267163836239083, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.029248290291042138, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.1718027364322228, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.04766862058769161, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.2043794633423878, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.03336328330137746, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.1957772364958447, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.041963867461860996, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.0516224121615006, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.30809284179770613, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.5212036360075764, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.2465035340121816, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.45633441623452936, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.14044205071392263, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.38712210510627154, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.27182145159884086, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.526070184366635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.23498807624250856, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.48765523434090247, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.14416088697983637, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.019129282930908934, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.07711027977872816, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0957971819913436, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.29031027439121476, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.06638301361073934, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.33617826818768626, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.07088281524771703, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.1725752257112697, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.11414041086884202, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.35006273110713093, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1313979824300913, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.349118229304567, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1927225490012646, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4132550155072071, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.14857103353932644, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.291239957190704, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.16558784557611658, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4099467657500184, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.01786532108380904, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.13121577813207103, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.05489691210701318, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.23494793085665383, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.22432434110392951, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.07275467861454202, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.14614295662986437, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3552926382379845, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1949241563385421, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3964264382707378, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.209649431441878, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.46764621241357657, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.14067214182271884, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3683127018431368, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.261537968195518, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4670896511609081, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.16432871779782451, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3743841843286974, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.043671403238541344, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.25919035356327436, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.009837503470338078, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.07381634573546925, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.040393483260585776, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.266056205197059, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.23272041020266335, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5347837552430531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.11901413329120636, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.2908877283991857, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.07103656838719773, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.27534420196130394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.13329630208389306, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.326901495019388, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.18044662621032556, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4057030488136589, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.17626315276087934, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3402055671772128, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.09337623404557584, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.255517984683644, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.041301896522444675, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.17385088745134245, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.038390108835707044, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.21229342933137452, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.08474505774044223, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.253832017325449, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.04460344454473387, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.050946055101903635, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.055710421288008224, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.23427034762899968, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.30658949507361904, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5157122183970266, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.3193100500854345, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5162296859807791, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.17957474071770196, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3714895660565911, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.17878846497211381, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4805877835621217, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.1604142113422276, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4821009866720967, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.025349142884759417, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.19474598217933617, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.021888464288139578, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.06880194424178804, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.07403099975997424, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.25305896973942904, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.15964995175974525, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3581305879558541, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.15593857496482408, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3832822126692406, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.03964626561854378, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.16338514690556735, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.056046675552729554, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.18841684806509754, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.209649431441878, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3690324235282993, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.028775739564490036, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.11889226114628741, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.27239589447707985, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.05973439570871991, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.20910579957872127, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.03133618553613538, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.1568404935147634, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0103535778609182, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.1374348609613479, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.08696621894928246, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2942577140950735, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.14094927983933814, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.32113486843339634, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.19139210005771193, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.39990366323430515, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.16166102253231274, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3131370145851093, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.14834005339736556, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3650996903674865, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.19730454275995257, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.4060233750197503, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.20908195213737235, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.42477137691772376, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.06349176246319257, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.06643196929197938, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.08968389355416555, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.26065548087048496, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.14086598242600956, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.36137008859982034, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.21107720643690867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.43911506176829573, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.03937709136327999, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.15616761711166294, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.12066885519467463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2616326726997023, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.13596145670351717, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2880980104507562, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.05508931511949558, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2517305146500679, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1101341452220285, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.27185902677547247, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.21712215954833947, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2399710396553458, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.05438497632520132, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.22483031146316076, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.01465842448246531, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.07372863328844016, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.30760942657718227, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.22771052484743856, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.39452182489614496, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.18493395776181829, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4386240113678834, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.20352428331068037, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4163814361305384, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.152083233596389, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.307389858154115, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.23982122784780777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4312572299745367, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.12388166705800276, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.0019267822736030833, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2729231212296316, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.16084008820568224, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.49453446122836875, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.13827947882974537, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4010585707766239, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.25209019490063744, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.21271493861618962, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.48032757836964046, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.10555548503828743, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3698372209729681, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.14160083166193357, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.42086810781344214, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.15873502699291203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.37163016195847015, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.03225234735559956, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.18669341091479777, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.22123617047892138, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.13242778128862073, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.03706945858003643, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.10714846237317564, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.23712734125066068, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4737847575052381, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.20923986420852636, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.49694187999493455, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.22218345206036327, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.531967559614025, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.24660313247404905, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4750802403221604, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.2302979641212271, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4950725740253287, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.03573716512983384, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.2636286370686069, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.008808482479470064, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.08172375369450574, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.027855314822211794, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.2743064672346355, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1605078796467662, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3607284417441162, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.054452721416611755, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.17786113214625052, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.07394430141087438, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.308455636822965, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1462174255670787, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.33477501662684966, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1608187789877052, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3641324834231433, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.07353968474801274, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.32309698978167023, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1046814649445003, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.35929100435813716, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2108920708499109, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.08862461368125828, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.06361340947543563, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.23606133878828675, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.03496289474177452, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.08333947754529797, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2642890052611854, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2009233547607218, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4134068926197593, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.15677826168589556, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.377185194614394, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.055923960513901805, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.19457014898752104, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.23365039523187425, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.42556791226379487, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21927506735329194, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4315395376333666, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.1373600644267889, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.006313131313131313, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3270101704079343, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.25513503948815797, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.5313546358608554, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.13788203160207568, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3107655646435926, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.1709984622318412, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.15045845033645844, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.35967967969652714, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.15055407617907607, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2925093305055971, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.154484521805181, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3359460432439711, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.12832094336767122, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3205905925059277, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.12963772036574467, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.06431520707014032, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.24869345184740724, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.05137179048809052, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2179007767607974, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.048398308556049596, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.16446970798512367, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.1688821792911141, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.1556988827286413, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.37169558531876135, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.2140902772642263, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.42684341080390703, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.17526461622814685, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3887795637263755, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.20843200459956857, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.42187376156085354, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.22441108116111003, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4354495412786265, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.09250654988890523, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.060410667564482795, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.04978921592425999, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2697504713353615, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.1634286453435278, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3969874268891194, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.19418939219609221, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3901489832573322, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.048304077864023846, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.23970159686687842, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20653927241974365, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3737641013916679, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.1930726574453761, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.36015092354060324, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.13575658061231063, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.28640162384542756, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.11215442765734894, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.31037546676327293, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.10988528279333451, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.10471517551249318, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.08226969152601427, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.15696885690938372, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.05196767194788297, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.21653643432750053, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20927351091825444, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3903841356584215, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20345556664614994, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3922452261221421, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.1872830229239533, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3677267744209934, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.13963559400140405, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.358561623108295, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.20899938247825522, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.41457588389231614, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.04191056758684487, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2747808392885938, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.07846319110079712, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.11613581491070282, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2960371403891785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.1945423193070673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4706222268986097, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.2080824447945289, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4427653693519822, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.096873931674983, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.357921862131455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.11119610005454576, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.30792475044662354, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.1321058295592534, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4148432475461046, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.08720698093571426, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.3394300386679635, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.10180741374280794, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.35728563956947634, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.06194295956144168, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2317557084334835, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.035176121652651195, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.17161490917675337, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.07130512646301328, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0583085961396453, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.24750289066642356, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.08557431032059788, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.3075931534356484, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.26485344282248025, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.49926833940150855, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.19756811204569283, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4789887223696734, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.10226015509299118, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.40597878858272624, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.17770686403077657, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.48140657517707824, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.16599660185655377, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4490603860832405, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.226528380402223, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.06004443343680894, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2456391015238017, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.04398085473438986, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2975413849030591, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.1334077033965181, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.3538599860218621, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.12768405545127823, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.27604666315815635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0780295493314355, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.24395345082956324, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.08696621894928246, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1961381404046578, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.10537687828190058, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.3233296514764708, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.07541784671250895, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.24910063520652925, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.06928783103636403, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.28222804846457444, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.017901241645426468, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.19675187530586138, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.032083175467215015, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1187504033570596, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.006322585449419441, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.06296222300910888, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.04661045606865638, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.051225703208053745, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.16491845920935155, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.2549048589985116, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.473197481249297, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.20050198179023138, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.35990851497839205, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.07176020589506472, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.298641857962305, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.12417300961365357, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.34447336960471725, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09985998421086474, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.28957013810992877, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.08690271774962008, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.2881002270580955, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.01552617838217617, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.09140076236425068, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09225450507547597, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.25320206287790814, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.15757286670204007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3806769190392542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.05441383188454176, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.24018232621879906, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.1426986810099998, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.31786120425004616, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.20406556965912795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.37608957302795537, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.12961716938790843, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3033295224621767, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.044505549603083075, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.24454668865020499, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.15282474172999858, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3332497402594901, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.06299746467448508, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.07142051780137347, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.21559762402820992, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.05462016386637322, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.1059992243305236, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.19829657978768986, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.20796615419937428, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4053550762669065, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.03184787808073657, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.01699912141808354, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.18085489460790177, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3621070250317998, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.17617182067763734, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3491313268402431, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.19992549928415637, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.34245426295474757, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.17229614610286134, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.07124457091987033, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.19364253421963298, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.35262250988466515, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.598931508663349, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7353063745802827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1327526847508867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37850602486495205, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.18405035438430847, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4142901090120915, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3381363585599571, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.11719733243527225, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4023449434976014, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.11625490582216925, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3649888937995206, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.11625490582216925, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3649888937995206, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.24239458593560292, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.32069132319909655, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.48514659426076356, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.605169324059723, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.08852681798207009, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3583179111355935, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.47039611627724026, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.6298835480734482, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.5920893212447781, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.6925021521158101, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.22478613858269392, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.44348101018104913, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.4026159305424288, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5712560131047175, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2296660762967038, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3916051272553242, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.07771324020744104, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.28137484554214276, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.183687049781416, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.351911486970854, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5181825846579515, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.4262221594184117, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5886657414856064, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.25861130592298187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.39452644092432093, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.20379250618355427, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.41085414309816914, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.16913268830937947, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.24522672479887386, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1715979984488091, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1715979984488091, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.2990226215771518, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4323734152924571, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.43218646131638366, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5429193883657468, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2334787866969297, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.44212827861876997, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5843055590705747, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.46269559069048716, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.46872641361415845, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3012789660952507, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17248715680799764, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.40043565243219187, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.422575783710386, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5948022735911326, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1518079711528412, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.35052882681128233, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.13835317113453516, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.16343842313572918, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3986641525285075, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5792139686527714, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.25798723088167685, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5244854229988815, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5676965183365866, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3315037521841549, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.468197879470805, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2003825042083558, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4389344981392915, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2003825042083558, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4389344981392915, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.460474309246715, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6213537794704693, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43104530127419294, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5857650290593939, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30829623633682235, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.44487288478084186, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5676965183365866, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1577724877619984, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.41413383653794145, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.003172770121174655, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.46092611919700416, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6365915338629015, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3344305108778801, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.37017501464955627, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22830327153687363, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4341368544819458, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.23874935220945062, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4641140866580373, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22218130727359342, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.39929356245904674, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.14646977906722897, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3483313798210466, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.14646977906722897, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3620367958150382, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.3194331635465395, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5031092445628172, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.40604669782469155, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5425760820715552, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.22259002746653545, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4094746713677566, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5831913487431972, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.42171123717273484, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.588496189746877, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.334422418242443, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.47577086062579566, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4482907809719588, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5498272118133005, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4180999216775964, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5354721595275848, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3786410842846065, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.02467424260792568, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.21902340561392236, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.43906671679239717, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5234484809182233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6658297773613274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.30007504691018483, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5153810823423555, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14074957769288798, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3750035199199742, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2543269102041851, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.48299470788082083, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2305720744624697, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4655848022427904, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.26538706048179084, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4982627378595717, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.10951170280696113, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.2749574296951936, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.112289032173749, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.36102352464840753, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.232738415750697, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.49618971681248764, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.4569234874515144, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6188845588287419, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.09583427356739785, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.40272844461027313, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.45479124441660884, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5872444902217987, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.4036650481496061, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.596979347591019, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3488611533620711, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5550499651473632, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.31883477089875656, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5510450101159524, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.32808594346681713, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.541693039806838, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.006356168623193365, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.038236956722392024, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.31998097041178836, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3815250264738168, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6516314751979607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.189902924205034, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4072184389907138, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.18710260593933364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.44334313717706003, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.20498415630763028, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.41559166314823337, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.20498415630763028, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.41559166314823337, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.15161074985415177, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3796830006266126, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.09327870647412652, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3735036941607298, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.09327870647412652, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3735036941607298, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.07757069009917116, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3253161209971999, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3611133026003336, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.628939204695425, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2445648178607759, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.533167536340577, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6607380945724757, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.26385937298217066, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.49313681898117245, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.22168992033645996, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.523689661176845, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.2722704374402053, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.596004654894533, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.2779440822726561, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4935559299167756, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.20711956213739935, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4163963687531719, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.01008902035184167, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.10279947040838337, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3569840483632983, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.40003810431098236, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5899097408105687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2097387761551816, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.49663301508497226, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.27067168022307464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5439625482235064, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3337338878596308, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5500899219967885, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.40066361284662694, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5020331850026643, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18679710353734788, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3876457319870774, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1353358401115373, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39493151971582074, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.13534889927489716, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39100244531038747, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2826204057042236, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5043062352893725, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4329476703214299, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5804967339644117, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3714562234354218, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.36330554542576643, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5680915937757648, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.36330554542576643, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5680915937757648, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.29588994069727786, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5527117669081858, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.46732353406180216, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6059276585345114, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4348120146318404, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6115466077533132, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17997291109158148, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3492017591855697, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.06266083709457643, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.36565527196849945, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4882803186347697, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.38047531731529327, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.49485723102957346, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4672309378181727, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.29886658673327365, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3270607740380116, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.18485608959451436, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21812881407613688, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3598346059855135, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.24425230015000562, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12137360695980427, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.30236214151783924, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1367498402979849, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3943841419148219, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.38086548136555726, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4893838263924812, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.22454147741956867, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3890858187033785, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.517430128986546, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4488802814935875, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5347582736861126, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.5676006714726635, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6880701448812352, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2974074484950165, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5121581247515657, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.45479124441660884, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.588444085222193, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.20438008283049838, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.010162846529607748, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21947959999379651, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3226457008913864, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4489235959690452, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5934678825154104, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.28800869328515505, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.49348678623542436, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2429163097293302, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5044329486461447, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3205104057171838, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5848707979780723, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3026681380066168, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.492659495510912, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.24586918158076287, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4658595745396681, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3319782560727053, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3319782560727053, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.23073085454808062, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.44142087654422146, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5449210171662631, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.015895766317321515, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.43218646131638366, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5402913319043152, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4488802814935875, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5896924236258139, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4406612884550454, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5676112112992767, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4476950425126913, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5932980209045412, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.47232136380051687, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6260769803555924, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.11719733243527225, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.311874117016478, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.1483315516064897, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.25249051585915977, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.38558450790399557, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0067104198717751464, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9025232868361638, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9169897590736298, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9234732618882052, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9472986863542112, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9709835434146469, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9951728990866464, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4888436840107989, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8291158208899767, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.8935248372106969, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9404428602061264, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.3642482472579296, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5342538783335161, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.18831933500600306, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4318025704181776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.4439623527529193, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5309137918519957, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.2628849077177109, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2375594795209403, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4291541972877265, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.34613789243685805, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.49330323192607783, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.21039673882735752, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3872019296036794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.35419471368515315, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.35590473147868196, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.17879309995151985, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.2568045428196672, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.3282410731566718, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4364543652558664, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3715102500248764, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.43218646131638366, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5369715393520321, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.5492014936877967, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5685465222735583, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.6099084961389527, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.658015760514539, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2464380578618272, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4278399263644655, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2699951684630893, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4217192983941685, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2292375926648613, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.45134534124852044, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.1879604201975219, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.11956615218925931, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3305337714496588, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.28406136898728457, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5649283064490618, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.21544027588567594, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.5040038440508637, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.28552127890094825, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.16448947606185552, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.23644551786043141, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.30630098078522544, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.506196410096354, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.3395693620772222, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.14494568696092858, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2628972098240789, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.14494568696092858, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2628972098240789, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.11823053204772466, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.24615921057796505, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.08463566423313937, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.1696081888168728, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.09184122179650878, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.20224830805167826, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.4955056754877292, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6304896503844739, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.5038374191720147, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.6024265863535287, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.3491726680217181, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4946434087697324, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.22916123454514536, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.4192305796685782, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.26871275285988894, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.43155016612001146, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.18067521624740268, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.03586767012087445, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.10393938326032184, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.33864898055191395, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.23843418577408987, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4082320855803597, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.26970223719007375, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5172978597562362, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.2372622545962587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.49004864454711367, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.190140357671548, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.26802811732881265, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.47842918615794633, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.16776974914462364, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.39918336227233053, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.22556860731509948, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4747086049005634, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.08950723402647723, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.33168942069142754, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.15013907175244498, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.30693889771704796, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.23647235972003527, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.41650969469918997, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.3456747067447366, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5098918117126965, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.22311931550349762, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.4067965564160296, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5347497635970215, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.37720172106772915, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5281246430019316, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.42662911848025076, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.5800596652250789, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.39420326688847324, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.6371076304605184, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.31833550883682055, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.505309837398544, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.057821363839455714, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.030501743754356173, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.19352792845274666, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3925864519770825, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.49458876622696707, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6179893617801274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.30630098078522544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5439056051092116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.3059872016765634, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5499457869553984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.20076347441707354, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.49625515445592083, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.35013594820721267, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6055612825349426, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.33626819961829335, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5531226519754557, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.25449674462950855, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.44805409822643144, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.11912215481390381, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.27872651214887206, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.11912215481390381, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.27872651214887206, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.18665948437666813, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4437597552815582, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4644593897211701, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5796009653873663, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.1927932674702769, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.5004894768464765, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6103735933609515, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.490343068868897, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.536581249889066, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.48670274592792, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.6717971788322309, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.417372155782838, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5043502592801646, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.4843150498645881, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.5578850047683247, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3374197535175977, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.06680433144407034, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.2919280798407827, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4076170046499833, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.41520313827696, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6485212540886613, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.19850842371858787, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.43584341835040474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.284161309400485, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5096201523229312, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.1134451991138546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4186167762559285, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.17251155106930158, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.331584914092895, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.3228858965814099, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4495802766763041, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.3312570339636223, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.45442661484375735, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.19397810500627852, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.21990266141251047, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.24088562704853508, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.3796021685415706, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.487233540201337, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.6075898035634494, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.23440876674515643, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.4317853842116786, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5396321094501078, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.479676449968321, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5540737713291652, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.43874832905672956, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.5882858748700781, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.3270207865532903, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.20934077434952414, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.44095319717043774, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.08298317943887776, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.25735819287934475, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.006769280526888359, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.35465713644381464, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.4911561718424494, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.37233302529431345, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.5112697990822607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3541251997977811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3885781003640365, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.19139378056089276, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4390566098420477, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3530697921185709, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.21741853044139284, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4000689216320657, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.22744906705116497, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.11460773731685357, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2063706369833672, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.11460773731685357, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2063706369833672, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.20533223830207933, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.3692051451450368, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4798870804700399, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.190947150655844, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.4946630566933846, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.6169269421105686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.4245848004818514, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.5212978747333228, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.28392242834976933, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.5447934365522582, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.21644298243944068, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.39638009285990555, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.18043239916836057, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.28005066200532414, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.17657522592016398, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.0013316837154984055, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.16192940337079562, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2536107727939302, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3857901147929391, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4926358895461277, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3182774828667731, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1214053825777097, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.30188342402741686, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.19420534060688366, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3642322841308566, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09829979956341456, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2894553915787499, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.10355978106869969, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3008852578497809, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.16542259679471108, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.26975832150444645, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.08700223397019134, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.23602944119319944, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.08709145169981267, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.22981857327949462, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.307338019036143, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3701729257793468, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4863138218190477, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.17278555874883156, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4407883467611227, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5281354273455451, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2945667423247454, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3820111377308234, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.34848192645275233, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5640398262082806, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2834052290575623, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3513608621054937, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.260711748598298, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3452976012025067, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.18058381042434055, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.008037190855333259, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09960206740894453, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.23680099011195122, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.16885023000999705, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3897135623573608, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.43975656978777905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.21883928293594496, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4140411856219547, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.14270596284245182, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.2812419410574613, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.3060434879538489, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.5282587183958382, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4513778135765214, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.2831988281847858, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.10556805829966062, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.10531631261880914, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.08966592262979808, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.15649677978231225, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.3589732342308518, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4090747564330368, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.10290348648040436, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.16608616831326783, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.45739385800652627, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.6041597507917782, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.4488802814935875, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.5304323627982885, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.39448834349896583, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.5955979870734485, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.38006094126945456, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4952333573221428, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.38078351880531736, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.5299172059880028, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.20371573027248793, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.08451648022462464, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.1919704825961155, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3465066144910527, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3325026294099889, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.45805631044287126, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.23530033724858213, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.46208607300298377, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2936164862319274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.20512476044697742, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.41887716212519804, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2835933397340195, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.46818740249280866, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30644785825458964, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5424888571764713, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21877512875558908, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.46843499948065653, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1385105944861409, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.29611042850691843, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1385105944861409, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.29611042850691843, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3001327916132617, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3729157997624686, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3688020573465489, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4388306821010023, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.18196604681759276, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984707831776175, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5723546698439199, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.40002626793346735, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4958139678349833, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.364475284252601, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5303078856093503, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.40570535345933584, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5128999889612808, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.4574297382369073, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5652380318903051, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1754880585752024, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3605361466369401, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.03788280195139692, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.18426708758406696, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.28632666167603604, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.4824794737945071, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.6479459161283603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.37284027455688556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.5528347504734102, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.2699951684630893, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.48283570465443887, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.37458689678657137, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.5282052116558258, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.39433348616404007, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.5908849928331047, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1945786954763366, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.4234025088421468, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1759696284842668, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.40989005404786566, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.20970020184997046, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.35879263764337227, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.2738643930663906, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.4248517359969422, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.18272411487051296, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.31610070189880046, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.6288402958621878, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.7096134366313045, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.17421986341870865, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.36042242462534135, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.5104779149627351, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.6193632913540729, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.4631700687380434, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.5656836354906846, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.5368572660911874, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.6191297828676535, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.524685968336986, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.6168947391572741, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.37578617319428625, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.5516093876511039, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.10653688743388451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3550283416692892, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.007563050858412507, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.3245592826703331, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.4961612243992949, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4656455050518963, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5477103600632085, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.38846174119508314, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.13497849469044018, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.294518464464907, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.09207365845406566, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2745662608288741, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.15725970843587567, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3365623219226185, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.112289032173749, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.29441273828339526, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14722675403683808, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.28846790344058515, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.08940730360097038, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.26833398639382844, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0795871931195481, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.25643127123309173, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.21555651352032254, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.36232263841752405, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5115662303301365, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.18623668314243821, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4478710142948761, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5290749755361062, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4407883467611227, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5398405650971414, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.3325507240998139, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4074972493988868, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.35372399264817345, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4415325921468054, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.428462689716825, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5332275836967743, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0061376873624774606, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.17383350569405645, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154339087127422, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.32445288009194484, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.485644095022506, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5923993450097689, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15487293534817623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39293494862736383, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2746536544630471, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5102501783603998, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154339087127422, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3913469689541562, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.30144001884264643, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.558144752753198, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.24364194228502442, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4803275525344554, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2542828011834812, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4695941026465371, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1449700553672855, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.34437317656055844, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1449700553672855, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.34437317656055844, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.33281148468111865, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4458341777155295, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3754361563303205, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5175586680476801, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2864399964630773, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.41837414564770586, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5788593226912069, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.522758369357291, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6580868319953985, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3876366843353173, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.49788350946214466, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.43024400109694355, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5419412911283815, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3968427844730261, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5690871427032166, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14331804860578803, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3550333934998914, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.00801785790731701, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09885362316286796, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2922887728653336, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.3150760288937462, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4551575101683354, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.21741853044139284, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3535910166292039, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22171131219306292, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3447082086936202, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.2006070365475092, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.35578462620981843, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2673037872588226, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.21883928293594496, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.46862000983867247, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.19659284558894802, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3439604955527307, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.260782501330073, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.2618919102735007, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.21812881407613688, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.30905257672100556, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.3492507317994466, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.5496225090740923, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.22497847412371205, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.4532607978893934, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.6057637451772937, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.49132510881927216, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.5226059543423672, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.07757069009917116, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.349379547582856, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.30548599245666574, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4679798467191344, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.15371120555586323, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.32771358491396213, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.23316370248970017, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4272786682396427, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.014790009967292654, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.1352102459252932, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.32096940912916827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.6485902560215636, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.7627201392474565, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.33626819961829335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.5466581859383387, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.4947540498519851, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.6678485036966576, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.13753714471937797, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.47439937147594774, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.354358992866224, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.5568265126063136, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.3232764601772692, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.543101366143636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.5046613014990851, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.6377969619576389, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.17864866390812006, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.4318546655126832, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.1810425767465413, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.40093391879422285, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.5037566723027213, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.6887059730011686, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.4334785460773371, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.6277016493301685, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3830039704696561, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.42376138127386387, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.5866233345526405, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.5594129511963761, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.6536638088389861, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.5639943531321595, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.6640560919035684, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.6878626650756018, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.8027840472006857, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.588501779106902, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.737119524432872, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.2572812062773994, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.47123700535107493, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.009344129172007202, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.25070100037410625, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.41914475317559957, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.38713346831820944, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.5376493568188783, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.32000331642122953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.5480591855923784, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.20926433421787555, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4046472012888725, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.15758218479424427, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.2152072672209918, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4179680482018134, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.19951297936500814, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3959122807457787, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.20326213373677707, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4211311971791892, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.12966020590511304, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4080786465652843, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.32499291864289587, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.3026681380066168, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4321096865782781, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.3484636843276892, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4951272293367317, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.27498661924425016, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.31465871983706517, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4839658797050458, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.2983588344542972, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4730509519236799, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.4096636841225722, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.5278686169310903, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.29040471938315554, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4617994565642428, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.23989822204325809, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4111447629542612, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.17659857710927052, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.372208073712156, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.16484140461834484, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.21181203648983093, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.4245081340557523, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.2478022357548686, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.49124012500448727, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.21132630077912357, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4175670766052166, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.22554684328666952, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4891635965943922, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.1370370495710889, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.2292375926648613, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.402538938764116, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.22554684328666952, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.39468338688472576, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.12890680068769322, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.18398927158547232, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.15865104842499722, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.08013992490936363, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.1552012946702491, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.08308205615346906, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.1560179649845262, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.11534301373879766, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.4347773475385108, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5763413809903908, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.4417179753220019, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5530863243539881, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.30027814350488985, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4842037100625574, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.20881729496822948, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3442652041844856, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.13481878804182895, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.20079789489773447, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.34136152610418546, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.14470626861490504, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.2566358039205211, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.5543498698280007, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.7016802877815009, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.13108369255325433, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3929302741911199, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.36484904083194636, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.568282580396246, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.17997291109158148, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.41721274496653427, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.2543023714256286, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.47904043667177454, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.2738379692102183, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.48783795879618724, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.14440270272056518, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3829771215415724, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2071380876663193, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2071380876663193, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.16912873274521933, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.4587620543754354, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.4569234874515144, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.6188845588287419, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.22035958513840823, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.4955056754877292, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.6373668822547183, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.404949394786753, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5523097163463507, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.17864866390812006, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.4085144222648808, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.4333488014632613, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.5965589158023283, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.5004894768464765, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.636358789468443, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.006245016945470881, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.009743563745649522, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.31745891481127153, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.13203823352287472, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.28280767234695003, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.20174045447955946, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.33729298835089516, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1667112120846934, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3259291852615986, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.15756751766261828, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.32067005734881104, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.16201038834403217, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.30711349527774556, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.14860706919206243, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.287605443809237, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1667112120846934, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.30913125513655043, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.16192940337079562, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2773561971049196, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1718526210271666, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.29566636692043397, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1716931385864913, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3193195680733763, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.28606398627870955, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3809088476029454, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.00807613785691931, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.41749084544527715, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.5326296670386377, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.3076950010419899, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3660673946004631, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.19454290935168922, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.40563779058474464, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.20365290006260964, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.4327934810550205, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.13565907333537272, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.28348064204085005, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.008095753277702367, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.12475528808004005, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.23623824056754247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.37420316460821246, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6481907872475802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.10878661088699644, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4101850467281004, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.14636985946104297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.42477479000228696, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.13490983794909628, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3721166387799897, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.345393650670689, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.08917122947213559, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3647616198018887, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.20455275179869584, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.47815735761186096, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.24433990852597712, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.20225251182568876, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.24199061099369143, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.43083710197985886, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.35617945587309824, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5928040083540559, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.18016643009908215, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.5627168454831983, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6692849204487608, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.5184565490048982, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6742153914378409, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.33904091445927403, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5149735242342894, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3074762344614454, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5504296088375491, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.489751626844433, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.6564932310015824, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.007811176696490374, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.009755969152501744, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.26351629170506197, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365707034585395, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4584149294578286, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13894362470892055, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.35710461878741834, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13763666698607552, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.35163868522447556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.24113982759255023, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.15487634919018395, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36831488669605916, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12408616318856693, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3652145198143255, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2280725846401638, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.09007790030767507, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.24033534859070405, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.197162821062955, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.22171463088948726, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3251763396242258, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4533778048861261, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.25989013073979034, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4315532957872565, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4869755956428243, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4440750605884706, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.49014891057645404, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.34420876516527255, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5068918436131059, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.31461500509304885, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4233526729436835, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.14023424042019694, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.24377593803204184, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.18900377082048078, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.004693571154544651, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.23316243238650552, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.26637160423927314, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4646735366228476, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.19585063466021865, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.37259242197674974, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.20191019088262566, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3773476047938683, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.15487634919018395, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.40723956344919005, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.09895499407509596, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2820930896340708, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.20566600172876848, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3332179942904409, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.18945235333331134, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3443950901432381, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.14027395289967118, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.15420519149824552, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.10067881517323463, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.3394719609522645, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.5910744763237977, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6719182394537323, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.18260478645231623, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.4946630566933846, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6169269421105686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.5795328348246752, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.6571878614059478, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.24263763794008045, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.45834072271970505, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.32018866449570155, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4142777372959173, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.2735838424624378, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.43137371459498897, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.007847958698936742, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.008115464234465965, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.17226307014894685, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2929617403109482, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.41749084544527715, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.5512835123193889, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.2661644792674617, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.46546633658762687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.21203547746686083, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.45610528927175875, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.08644237346462266, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.32003978062314414, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.2816688975928506, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.47573600128712834, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.11460773731685357, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.363038960346026, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.1581734375963556, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.29855796123914957, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.17992187611846544, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.17631896061601307, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.1602999168643203, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.14610129766068167, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.11719542744488858, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.5176576778046864, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.6613069296035906, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.6510651028008023, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.7441675183592714, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.31148557892944495, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3867464805058363, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.3688189810109343, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4691915858995521, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.3580995277077084, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.4302234254773672, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.009873841081506156, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.0465791689548439, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.11175354227446688, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3298241713743613, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.39245477087067665, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.5302932215753233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.17171159782066198, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4250539223489113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.1718526210271666, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.355144746174474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.18597468573870948, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.3585545206106179, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.18945235333331134, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.42435548400666784, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.16446110196634803, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.33797636030209444, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.16832254701348195, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.34172735320777375, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.1663768231803161, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2598332226932194, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.2543269102041851, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.45657979346800803, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.34876164625175754, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.49034326628094554, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.23377734254023033, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.46390013598008395, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6066709784156303, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.5563610896532531, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.6234143352599183, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.3203505804334101, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.5201747871509939, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.18367196864850696, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.36528750965938445, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.3489571331320156, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.45391662061595967, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2684337672133018, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.1642384866095333, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.4097157364333445, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.4592265081063172, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.29896910170538116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.41423657620328247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1524830877429947, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.20357681430297922, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.14691264038600516, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.3211565002110335, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5113942347100009, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.07113990450264403, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.26718576718028936, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1567864814929981, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1556051570841198, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1556051570841198, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09802862511942351, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.18018110949421698, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09429247283039355, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.17793742526979547, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.05928666280553014, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.13867167773951167, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.39615544682237175, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.5208357532544405, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09802862511942351, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.18782383079014037, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.1414355019095281, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.3460029262848581, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.08747015602038587, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.09252921910030763, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.17272055181675486, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.150668838416559, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.05963570658750385, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.2115339554327366, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.47475604110292025, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.6309823582859546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.2685172542195998, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.5385038880406502, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.25284666735526534, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.48906659909069483, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.21208798872596596, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.44139630621320486, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.34535868670642117, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.6504104766728446, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.17281509196575948, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.46979670326158635, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.4102495208939548, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.6896260480312464, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.07209117403380154, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.24854556744215647, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.07209117403380154, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.24854556744215647, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.13801642938000075, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.05278366236265346, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.13670933737234986, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.495122539653757, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.6951565410196323, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.5067058201778797, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.7186203369024515, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.3537581250657245, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.5377035718944764, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.00615279366159919, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.2738643930663906, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.5091945154151882, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.0062763757184508785, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.006004997316698418, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.33636994463223036, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.18559542135951204, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3804842882867387, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.13087682931309413, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.19462952976787054, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.013538497707846785, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3375178721974199, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4840651438641062, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3314824344065458, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.47145091131338446, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1484131243041233, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37025679010586365, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1484131243041233, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37025679010586365, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.16678872216161894, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.38156158663679846, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2312892590926174, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4296420129947672, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.15082713742973322, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3965911699770542, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.26279137685269766, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4760220740362435, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.18928475425929295, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4916060435820526, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.21940429389247643, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4343280866601455, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3300025916068812, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5052501972629104, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.16772893302667713, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3631993115371921, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.23271849054299032, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.44780782224814397, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1824401863423467, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.36709433185688595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3377854698776805, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.521201229892482, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.15122189206102096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.26750110507308866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1324578891826276, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.08163977068875294, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.16171314933474576, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2876418495512317, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17431417316164047, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2946006716848339, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.21028610725809627, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2077061256651276, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.14134641571854575, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.22948919855739472, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10227637589934795, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2639843034394545, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10322985794794913, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.24491122482530842, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17600429416656618, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2712560798864272, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.21268444697113978, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3229997133764549, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1475503033983142, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.22104108935973044, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.24911274612875411, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3603818786794888, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.25558860194930705, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.36153214205343004, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.16590723086720205, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3048601435624646, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.23831215045289575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17543744527808774, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.28201016956553354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21685485833927476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3714219747170047, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.366137273378509, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12656494026948834, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3156355830822428, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2496001284078824, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4448983035850296, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.15110567441923345, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3452827306773606, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30327872414714485, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.49804213541579834, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1308672243682429, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31750364852389135, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1308672243682429, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31750364852389135, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13237645860785527, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3818322535970043, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.19532265607774463, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3705900176275221, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13937542038981274, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3894719473714519, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.16091123830242154, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3479090205359599, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12017396628208415, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.31451281894570277, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154235663145316, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3906877817743504, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.16701570871784516, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4021286881032558, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.17068857826795375, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.42259904419193595, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12489558671106761, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.34127082262151476, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.29383139922210444, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.15799783604363904, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3949243937510492, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.112289032173749, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.17726100052085036, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.14276716121505195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3191375424862687, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.26765226160005684, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4201493552127268, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.12503614625842938, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3600940511104839, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.33752742535974617, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3557485348784759, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3557485348784759, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.31758120882708796, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.10575365422675637, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3112197010206788, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.11873119582007514, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.2738621723440972, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.31669205297743036, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.2662775444898412, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.33753843688529356, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3593717322097392, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1486872032633242, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.33234354320913057, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.11809858631445573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.32136192142934833, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.010176705289341573, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3124983184732695, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3710595252626966, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18235247300784824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.40779523977234755, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.013915288440632284, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.1949274977582591, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.43455936271970363, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.27011759273645686, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4427599081001661, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.39962545473912425, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.42454130154428354, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.41372838762491687, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.39112369376374106, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.38174551465681344, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3685731277721772, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.25299682930744943, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4798320133488269, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.22524348812603687, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.446814197726471, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2202248274013358, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.48474965676300186, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.45813938111627356, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2519921893632723, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5094822088665985, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.40467880605321677, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.1593344703029041, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.22494952618128455, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4760660341798742, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12666372160329223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2650373529479294, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1649662542496744, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3466546857451185, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.01536966738773372, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.15884362032938681, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.39814998955321834, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2808927299109351, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4034278533385552, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.11900012437357166, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2764563185977449, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.11900012437357166, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2764563185977449, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.13829446068705525, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.32059338352121075, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.10814410080481558, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.26882333913101714, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.13110409944926496, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2967415176257371, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.24951692246125404, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5427942390000431, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.18357153089008083, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.42052246538746396, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16521691795932783, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4134512022176617, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12366644075037489, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.37651342775995167, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14937342175323892, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3978589591029684, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2808463392632455, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2962222000049211, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1971903602140518, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.36269646528997446, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2922087191170089, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2244748716483542, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.007281906895508523, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.36622684113978327, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5409038673960387, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.26632240818317526, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.467076790922237, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18294117097472648, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4383387744769579, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18061023425907288, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4007053923669194, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17504630199215807, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.38391258078488993, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17092467746295725, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4340281226634826, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17730543118229922, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.40481314196782764, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.16027177058640993, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3907258279276635, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3857801012055859, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3917239471102329, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22860414459682069, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.47331131010100724, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17200673466668953, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39948318545775324, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18214748123680227, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4214655347512288, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.16554857129611938, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.41162096051974856, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.10553225565626573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.1763116500850642, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17730543118229922, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4201842844735916, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1740044679403827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36375152376157177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13269353024089545, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.28998089836851504, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.01357525601063516, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2592282745024492, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3913113624869553, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.25387990321843446, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4061799423946215, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20298407172594946, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.427376330935813, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.1903260814647347, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2078396213395674, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.382987159925022, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17155797860081173, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.38967832102616645, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.10669759380473058, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3168512125430416, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36557640203818875, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.0022163120567375884, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17558199612672082, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.41334979014850587, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24318848592140954, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.501343318078065, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2759106084908679, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4112997822588551, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.11436433361427001, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.31764537430109413, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13784906211485343, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3161105981607342, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.322788951728102, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.40263021320001785, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1405026510197826, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.24785258181936404, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.27915769824643066, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3696324209636866, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3882967156327901, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4287492504761661, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15626231814206226, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2918712789926548, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15113194654882217, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.28343069346633953, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15113194654882217, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.28343069346633953, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15325316503089068, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2756316951639811, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15360664677448232, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.28023683151672674, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2040828288243841, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2778060655126336, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2851797100495641, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3210853623565359, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.28306950244125495, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15626231814206226, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.28318293039009146, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15360664677448232, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2898716122200844, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.14063630555225284, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.24531520458611372, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3264287329357334, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.41662443172249786, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16925466459550803, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.35912398848424326, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.21547697432588886, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.18039960295364865, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14645066834461026, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3483479830313014, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.13602652550459576, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3002035243994678, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.21397099133614067, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3568171392601981, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.15515668623277346, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3182263053072105, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.15515668623277346, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3182263053072105, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14134641571854575, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3078571099929154, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.21666466990625632, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3532993717654341, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14617001423766676, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.279165484305884, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.18061023425907288, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3684591961152223, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.029026566383552038, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.15658994837053716, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3084004707364603, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16165057948216605, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.35172210628524053, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.1645494395423276, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3395039485255903, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.1762505702729762, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2840831394823574, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.012458960343878354, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.20053583653512705, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3585550644386862, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2327080490816513, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4213315211213489, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.24362353508932386, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.14005830765988142, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.28271314565258726, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.01943377856541192, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.31144493768224313, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4723803506062751, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.24250789663911215, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.44127658727390434, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2298971389591186, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.45764667682340326, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.1531305080006172, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3836419455857873, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.1531305080006172, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3836419455857873, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.22183437291807073, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4690517750319636, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2327080490816513, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4435738303024312, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.16744134956159434, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3646163103836821, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.1743771229292808, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.46893502773403367, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.15808694325739014, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.36205822086931505, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.15089318423122547, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4354703980715437, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.3931382365355541, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.5429286385993002, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.2147829756231977, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.4454092411234148, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.16060122347918185, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.3962176890519291, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.007237155276460672, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_from", + "metric": "bleu", + "score": 0.22233922818300378, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_from", + "metric": "chrf", + "score": 0.46750271079023087, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2568191876426829, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.28135849152758385, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.13585608692428647, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.24602093467402117, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.014749122939855126, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2390985318008474, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.28355113133330917, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2769725060346048, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.21361876575641728, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2033571257639696, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.22585782564798598, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.1190636583579963, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.11020510700910162, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2753348107620908, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.31634304472566677, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2462954618610128, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.25512324153300714, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.23562397727650744, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.2390985318008474, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.006249447069096045, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_from", + "metric": "chrf", + "score": 0.17131793456589922, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.12832055613623328, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.19433944404681203, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.2913876815877049, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.1217802106941195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.19314598726036322, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.26784884804296605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.42827938621547884, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.18723860296151318, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3744699636940152, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3238973846683935, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.12848668968362775, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.35101287733214975, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.12848668968362775, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.35101287733214975, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.1361658548186748, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3295167855876769, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.12053310055432334, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3171578305540752, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.11735344762898596, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.22119961905555086, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.25753379048739855, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.42531177875262893, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3043567522223842, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.12819825042984195, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.36515328991507745, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.21743769222637532, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.4131100936190792, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.15471428129658016, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.37972085537143463, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.1322148042039212, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.3008688616645954, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.11914562165195522, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.36399317085467314, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "bleu", + "score": 0.1302352098354987, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_from", + "metric": "chrf", + "score": 0.326035134708999, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.14410670132605607, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.24025207593480963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.16306957103469613, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.14219389639501667, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3550945020345845, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.013501937941345124, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.1383930915047986, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.35245472074054107, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.12832055613623328, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.239266757938952, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.37693028676849333, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.15815783405728007, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.29907122078670734, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.15815783405728007, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.29907122078670734, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3791177761741048, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.33706259236262626, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.34848136198939356, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3767372261720185, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.33711667257105227, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3284649068899757, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3397876134677058, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.15626231814206226, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3835008462447854, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.14855426866172083, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.3526199873839556, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.009628007582726738, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_from", + "metric": "bleu", + "score": 0.20247469739337648, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_from", + "metric": "chrf", + "score": 0.4418847146430419, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.8827916928185874, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.9278293769424701, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.5919743410620021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.8142101616656354, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.2762822897608569, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.24942094354139677, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.8293181259810137, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.8797044719652845, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.23537164857894743, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.689799263500028, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.6642718379939968, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.7768492311706325, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", + "metric": "bleu", + "score": 0.7660237942267061, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_from", + "metric": "chrf", + "score": 0.8523393041110139, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.27434065146872866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4551761513917315, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.28112283847231073, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2624404628276128, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2044894275649509, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.28937737330135566, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.50463558154729, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.12212865548711085, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.32830246390851203, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4479129164961325, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.09225671760412947, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.2523418211331404, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.09175084684048486, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.25022921632561346, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.1423256407233325, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3890910518336767, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.2834052290575623, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.44636998325082133, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.16809016325262707, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.3578360181550615, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.3420291337705514, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.517580206652821, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.21893304385292742, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.24797984721910182, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4497423075151473, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.31443515194397026, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.47095169791332614, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.25571867215049665, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.4214364635384463, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.09975648968250406, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.31737139925438795, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.15310672854444382, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", + "metric": "bleu", + "score": 0.3863552232164501, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_from", + "metric": "chrf", + "score": 0.5712128723523814, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11460384138378832, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.32308861733051, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1308613527030366, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3063146286877558, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15082713742973322, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.32932326410706136, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1327211341271203, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3314509193319989, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23806227267122468, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.43401560728128324, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14062598436731893, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.31701958535647573, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2112174444529806, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.37115876147810895, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.17297486233533188, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.17297486233533188, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.15844990886367694, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3420386248472483, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.21243241299173296, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3866454943966844, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.12008699630291321, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.28909490209819155, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.29329619543305546, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3509216685503233, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1357521816192783, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2572585481186862, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2045516326940124, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3580560102192693, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.22103128459583973, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.23814249159794534, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.11682130307923512, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", + "metric": "bleu", + "score": 0.12966020590511304, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_from", + "metric": "chrf", + "score": 0.27356002971987425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.22974300992320248, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.35766721538849355, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.21931515993565381, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.12066241764747698, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.27545568870085096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.019283248858266676, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.1077448900968642, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.24768004113341202, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.10949741420907307, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3143619842598707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.12030921204016166, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.2935108999290831, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.03466486739421709, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.03466486739421709, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.2136628330685448, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.17681988003060095, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3160912867877847, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.11670085450653153, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.26006787116266133, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.31285104538031994, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.4986402374312041, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.10273756991976558, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.31893745331776113, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2130931749764531, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.34386259606696806, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2912425895319303, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.44041590401859537, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.2741063450190292, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.45913091016192176, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.10471800982955518, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.30028124645625864, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.0994331256564067, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.3083809409945523, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", + "metric": "bleu", + "score": 0.20416354003164872, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_from", + "metric": "chrf", + "score": 0.302007087079803, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1812045836887171, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.18649703687001343, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.1441966459257424, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.10163106686838855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.1449839903475139, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.011839425862120785, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1828304652414782, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.261122481009236, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.19316111794720275, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.20236126962624626, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3258121781111335, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12832055613623328, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.22099011517048978, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12832055613623328, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.22099011517048978, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12217624912667482, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.22607924847614314, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1242416606498705, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.18436029336540405, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.08568635726825895, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.21591747223249003, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.1809496049496168, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2213733761113641, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.14455493909089934, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.26045772569635717, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13351234315884475, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.21097478779973527, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1535686541317235, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.286595453551809, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1258273118584677, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.23521228502721644, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.1222932912515144, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", + "metric": "bleu", + "score": 0.27908865215418427, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3015846610603115, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.14288815197601673, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.14957316612525498, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.15892175003851755, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.12238991307064728, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1802916852147069, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.2500534855326251, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.14580030099786817, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.22020305668868392, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.23870544239673078, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.30389718661514126, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.10277044358219119, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.11241969208680343, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.12217624912667482, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.2594914718095331, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1226659433055874, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.2503423204189662, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.11758806799906993, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.19638036904069978, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1226659433055874, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.21460383218365395, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.19454290935168922, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.2850090374144756, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.2741063450190292, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.3186528530268389, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.23870544239673078, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.304002814359566, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.1722232255922769, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.24223436609765592, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.19504951470199663, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.11955848575576285, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", + "metric": "bleu", + "score": 0.24214093597439865, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_from", + "metric": "chrf", + "score": 0.30667375521853, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.26958290276046354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3771988116643981, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.27675048474641756, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3780460244391623, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.18915983539487516, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3233732862049797, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2511187197601112, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4502536324640797, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.33668442350030275, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5178614849361202, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.17081061355061614, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3645499017230567, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.17016486621490087, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.38158712387268096, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.17016486621490087, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.38158712387268096, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.17730543118229922, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3932130355670893, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16678872216161894, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3637613572806523, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16165057948216605, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3755941713865309, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.3999374157919046, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5786384524608545, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3194362090066573, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.30611912992377904, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5089658823760935, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.25666145410765273, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.42824842089739035, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.3492520394471386, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4895757696342183, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.1626739260030573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4029875964220287, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.01218568017760398, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_from", + "metric": "bleu", + "score": 0.28487594977117575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4793282150965233, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2758862937563794, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4673996585329364, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.12503614625842938, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.20624064341134082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14539971733340926, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3142480613342232, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.013501937941345124, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3007733426065667, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4585623736430774, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18630405675789963, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39138520850647673, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3615889761528277, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14745425511282265, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.326627179815379, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.14745425511282265, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.326627179815379, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.1307524497557363, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3361579714658665, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.16237908184412747, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3670808372641936, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.12757247160029234, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2807962808723839, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.13574363003136736, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3517512873782119, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.28191922425756916, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2577701686990218, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.44408712841584985, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20889290280122064, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4127473131540302, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.19568336427344607, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3996176422345778, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.21245704249360323, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3820763688368091, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.20666579426708878, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2472264820166318, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_from", + "metric": "chrf", + "score": 0.44131209510775493, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.28234422994155567, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.40408604199549997, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.3368893372278425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22475293380632405, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4113282938664548, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.02586907629320447, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22495022212432908, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4140027812147465, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22495022212432908, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.42230996708392093, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.20588815727980112, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.41944461991174653, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22208402896971946, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.40708637163037936, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22208402896971946, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.40708637163037936, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.17504630199215807, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.42263957382757394, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.22469734403789665, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.43058846803481615, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.11460384138378832, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.29561644965902306, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.217554942150074, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.4124786355218829, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.41138829710711844, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.29668873351523645, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.45439697849389765, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.3300025916068812, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.48848603918467354, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.3300025916068812, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.48848603918467354, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.23245687974745427, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.44333733713231366, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.07378730454743347, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", + "metric": "bleu", + "score": 0.2147829756231977, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_from", + "metric": "chrf", + "score": 0.461889639754688, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.1781853859048144, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.36865727091511874, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.2961559727627133, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.1087256678530004, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.2806568392288235, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.1117144649534104, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.29563794116222847, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.13836903384315105, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.28266806314793996, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.13588969750586194, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3523239132597748, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.11609617787293049, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3030210543724182, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.11609617787293049, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3030210543724182, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.11595071162902998, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.23613460338414927, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.11092325635116752, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3045281033610173, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.122897988281654, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3280398584422872, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.16318495811623954, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.2996949855880253, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.16397906895258596, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.36106327720037035, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.19000969221027156, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.47485346348682694, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.14214337448390021, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3609159299635901, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.12217624912667482, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.3477174082437146, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.12275983234737853, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.26198613287829026, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.12580941330293896, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.25539472441248895, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_from", + "metric": "bleu", + "score": 0.32512365186675757, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_from", + "metric": "chrf", + "score": 0.49575683948811416, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.12416350645592025, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.23265120010755289, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.12846497020051437, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.2670865602673704, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.12837839907779722, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.27407435258732404, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.01943377856541192, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.2707832297441565, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3606120884713659, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.21222721003696557, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.2775686235755007, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1315376612843441, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3028123600861066, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1315376612843441, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3119958542179166, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.28157636825815224, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1645494395423276, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3212878804764923, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.20690996611611379, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.36853943211180007, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.2957851779920877, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1340616911790049, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.2770773336013796, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.1380829650365223, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.3721998912104682, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.2602720291074952, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.12666372160329223, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.28503812950544305, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.13836903384315108, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.30881683598641113, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.2181762689207584, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "bleu", + "score": 0.16091123830242154, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_from", + "metric": "chrf", + "score": 0.29683125676353944, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.18772266185346026, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4516796575038181, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.28252374116432993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3549531183419122, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.11203754340102182, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.24500939878540784, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.028862054978314452, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.3147237572518304, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.49783234003172966, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.22987974754633594, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.44132234050227787, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.217554942150074, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4362387654025806, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.18580429349907815, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3794437278624784, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.18864182094196982, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3794437278624784, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.1457751611852363, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.3555305489093387, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.1761368775330164, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.39519338190185777, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.391804404659857, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.3621795144956003, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5514543720034338, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.21311107970920584, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4388024026143886, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.2567411255293559, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.43316022307538615, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.26845008380756696, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.4949662669458603, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.3829453159673758, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.5336098994428017, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.13600287923663476, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.32363483083794653, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.006356553689956574, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "bleu", + "score": 0.14219389639501667, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_from", + "metric": "chrf", + "score": 0.42294374285621605, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.10531636385748798, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.15775047351971955, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.26128489301072644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.13693974024600017, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.24848210874138496, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.1967909040251079, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.16079298647562637, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.25931919600838865, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.1322148042039212, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.31233141405348647, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3034375834959013, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.21189649845220918, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2551309002082233, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.1331440297382392, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.260380807406192, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.12235107682054053, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.23169643452178718, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.21608366110461316, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.22194631082821747, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.2586080894830298, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.19814442729413892, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.3102637353553794, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.33957218715482057, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.1331440297382392, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.26536762328710567, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.1547905499593561, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "bleu", + "score": 0.1797040059786851, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_from", + "metric": "chrf", + "score": 0.31684376069016223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1789898550500511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3097165910502381, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2126837065505244, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.11378142777276677, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.22340791296245502, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.15292856632736312, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.2723711009321726, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3950616413683569, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.2590721086053319, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.35495860162749965, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.18285404868730815, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.30239028036773985, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1736086198203101, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.27430743839572425, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1736086198203101, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.27430743839572425, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.13410639648320277, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.2766307218364423, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.13009868453084908, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.28428711747510377, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1077448900968642, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.20612085276116188, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.15022502107020383, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.32915753814356496, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1315376612843441, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.30125329611269197, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.34661236387446376, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.460219316901126, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1665583359843711, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3065906311539413, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.15866683249809552, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.3074056285119982, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.16969453584247343, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.15533586874332386, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "bleu", + "score": 0.1629119279942046, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_from", + "metric": "chrf", + "score": 0.28005921777118686, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.15742302643532463, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3220278551038813, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.18889796346849766, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.390828018955539, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.1484131243041233, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.29204141824583923, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.019510108479333106, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.16165057948216605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.41967247126395896, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3351318042725036, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5017887940611191, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.23292164090728384, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4807364086898486, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3332385910410069, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3332385910410069, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.12690518984438146, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.34504023491572783, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.15533439104366398, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.36668028585184187, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.10487303092333145, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.34114139472506677, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.1792939675839681, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4629429664491923, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.16023828054153882, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4268341897125674, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.16605519952887438, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.4509175930579823, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.1453056698669881, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.39967038005662203, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3810271038049442, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5487840152975325, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.1580433751277861, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.3469496558541037, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.2533911497972673, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "bleu", + "score": 0.3713354961020269, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_from", + "metric": "chrf", + "score": 0.5550866336796069, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.14855426866172083, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4089031318363594, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1736086198203101, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3517982963278223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.34932113360659606, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.19118896363692645, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3809795532418233, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.35069983379310715, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.16027177058640993, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3978173363117612, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.15404632289830114, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3934040018417114, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2467667610422869, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2467667610422869, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3917791418162285, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.18164305788156723, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.42992191257005574, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.10629480219240392, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2893247452861206, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2327080490816513, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4453208003026105, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3391544706988281, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2577701686990218, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4782520457453995, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20868721961570674, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.428749486637124, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13948420003997705, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3744296785576754, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2766362616814891, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.12718016030558363, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17600429416656618, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3699002945708035, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.346045680932875, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4621083930255766, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.25751650996406256, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4024993164207148, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2235704325446919, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.20771980047219452, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.42566629822564495, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2387386324204149, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.15606652450871636, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.38676973597326414, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.08842019030033135, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.27025449589526984, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.08842019030033135, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.27025449589526984, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.15022502107020383, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.35815688949507335, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.2012788513843773, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4158992611710376, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.10281312610906089, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.2831556428398908, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.1717286307100586, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.44149458986800383, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.34270506983355076, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.2569630819965695, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4638113964856152, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.3110277298634108, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4544814134892622, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.27645608560303897, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4775117408731701, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.09296917759336987, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.29970256002116996, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.1514798524753532, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "bleu", + "score": 0.21085288029061555, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_from", + "metric": "chrf", + "score": 0.4495966880080192, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.2873180113751827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3691291664744644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.21636840076404606, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.19001225225138996, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.15902128868434096, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.27967227996169636, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.26474827115975164, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.30464291275706445, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2685078959784482, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2685078959784482, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.056621705833762526, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.20663551397330182, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.08335112496018125, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.1757222237049662, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.04743571775734884, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.12540743214067215, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.15799783604363904, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3478409813873873, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.017418423019324433, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.1342816454725345, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.2947742083176782, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.19968127388777596, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3882998250788871, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.19861626912448344, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.3882998250788871, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.22685806215113302, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.09083570416152802, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "bleu", + "score": 0.08340582868969061, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_from", + "metric": "chrf", + "score": 0.24375361043612415, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.18582826054135923, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.27931164611946097, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.196045070105177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.24882723725145164, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.014111660160258993, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.11276958806531856, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2856161524537875, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.12692707541575554, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.309191449874187, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.29331713922012836, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.26291153755861957, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.26376991116778054, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.11825188614600338, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.309191449874187, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.1404678994830558, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2962148768788933, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.22350242251322808, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2812642831742161, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.21676612877798807, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.33382277849238146, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2689798160106529, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.11610083023532239, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.2862219874694651, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.295153584471285, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.05338400788913371, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "bleu", + "score": 0.15069228960836628, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_from", + "metric": "chrf", + "score": 0.3780369016923967, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.1087256678530004, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1812150267056357, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.09624090077172921, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.08196612912062277, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.08546743910655354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.009628007582726738, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1803546807787764, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.13825905140405212, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.23256631645019393, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.22256474447332572, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.027558878227695063, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.027558878227695063, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.10539140971370214, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.08170791430618002, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.05861911252854408, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.11209478008784199, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.15557700252716788, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.02731253201398055, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1612982609267219, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.15080392997556943, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.027558878227695063, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.1476996786071879, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.020035925770441693, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_from", + "metric": "chrf", + "score": 0.14949577610607986, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.08206174754800233, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.24600065227133203, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.2894206730562163, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.130697570216376, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3655209094420809, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.28364158842913945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.28212742472349317, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.013800067168664049, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.27274442393032494, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4767475272675149, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.28669684236421306, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.22839803716503337, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.10704604894593339, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.28394660030720387, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.108043996762779, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.2946349659761032, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.02321338950356412, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.18225955227302953, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.4400405224814026, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.02340652500728732, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.14868720326332424, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.31851758158862814, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.16574624158406068, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.39903127217688206, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.3672664653484505, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.23788431027756934, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.17441676789914212, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "bleu", + "score": 0.11047111196276078, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_from", + "metric": "chrf", + "score": 0.34052178710726805, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6358921902612438, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8041899227402122, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6299285159340671, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7993134129243716, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.710159574003633, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8462481747979111, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5487830136896633, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.77238965036654, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5375319287331519, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7826716936890488, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.48244543610473856, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7294391805717774, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5745954681260859, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7920051188244848, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.473743611024707, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7204787094069257, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.473743611024707, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7212488881410557, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6012475603804444, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7990339788905771, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.582501414351973, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7832709523750881, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5293493442903949, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7338751118862769, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5938611220262585, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8079474861665713, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6407610564169256, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7997703697716119, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6660677740125452, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8293798371335214, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5718247506430171, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7570613392550647, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5718247506430171, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7570613392550647, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5934447341895612, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8057808261092149, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6241924127610678, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8031006153647919, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6372502110149713, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8187019874664503, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.35059076445515835, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40219803477483124, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.41316127706749806, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4430321339435623, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3993284843242707, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224738565076288, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2908087026261561, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3411361400094189, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.36861518849172603, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.385899929138448, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3504650671187503, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3985617531166433, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3572514590810421, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40312319760122833, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.367359470372992, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.397303106677261, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2940872474292116, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.37182095401206455, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2996868226086902, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3355531727847081, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3704616277624811, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4166020708416767, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19450029639430153, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27662361414224973, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4317270147628918, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4476891051893246, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.31869252626249206, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36089356265329914, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4199243020508202, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4310330650643179, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.38146085172952343, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40030269579783606, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.36258675084165565, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3828423818810428, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3146999839790487, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3609768423654951, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2613520653232399, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.292974388325607, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.38876512474558916, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.41342876789412997, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.8780634320789833, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.926946700115022, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7964573357809173, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8458636471716781, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9452996322890763, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9463396364218181, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.8781548422306138, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9156314785041992, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.8562379115188704, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.914880147320643, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6061745621552239, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7612297430473799, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6537803976048806, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7742226743967544, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9577952806172931, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9847675380468571, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.738238064391125, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8637738769684485, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40673971192998765, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6897190926100627, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3707525915417785, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481906761834414, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4405434565828979, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6872423435487918, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.34070519401434163, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6376396416993303, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.31345365279001985, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6269464695155232, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.38318568210251663, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6625830408830621, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35601247064914876, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528728847159075, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3155065747070078, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5938531598317043, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.31724496872686936, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6080661572259668, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3535276144718208, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6399338911163, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.32909385533018776, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6587606187204432, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.348920853132259, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6505040271951762, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.42021658469726225, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7002995337928327, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4142150747824564, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6886174346652553, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.29793763405666984, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5878658443031616, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2737856702715042, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6091441790112126, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2737856702715042, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6091441790112126, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2831940852570977, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6293179946715834, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2060740184460064, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5566122985381202, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3764145740138264, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.660406350984819, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.703373719677874, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7784050705257474, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.469958733898233, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5843756060033074, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6034601376302852, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7074074363255227, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5200692650497809, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6586847274336591, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.517670839326365, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6443244311395148, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.668895061203786, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7856645013611931, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5724622291345857, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6818279156433621, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4211686427838433, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5788959327608644, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3472652233717345, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.536323502860556, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5439803529976158, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.657598922173703, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6910600271263078, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7855543051722911, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3051409552296292, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.522746034067617, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.731132155274915, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8118306465406135, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7618275371106299, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8252860779969783, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5447800851151646, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6845859707632784, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.63457045351243, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.760139991277541, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6515663614401863, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7685095079945828, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5435307013262817, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.646576761573972, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2747017431249852, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4456826256200505, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4834220366915352, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.678862671476654, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489926819498492, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5715668842319502, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2786169604662155, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5267252236203236, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35446322216812387, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5208748527454148, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2679728611808951, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.466691372759197, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32445558465151153, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.557607437471968, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3842600770501223, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6030559477915464, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2676232320051144, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5440246804235981, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2559102158652863, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5013387365293691, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2588692070329805, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5018097349874775, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2328598163544389, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.46604753989124215, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32150635033114006, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5479695584127602, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.20024611058462247, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.450999802118824, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.36488083606907545, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5811650865491297, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31876018479642804, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5884644303994607, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3249989390135794, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5532261012182782, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31400830186120793, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5436299115609682, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3104399267861846, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5390234949668378, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21732968497953328, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4683342448941504, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.180038135256147, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.42760668286140896, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2702404890575711, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.505948742808373, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3410244689880313, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816669416914216, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4234343012313773, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6625289905598352, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3885765192359091, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6554470157301392, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38108864298853723, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6485553379227472, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3602811871213193, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6460863746215998, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.385626093679484, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6500036127408896, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3765213224289163, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6469521424555786, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38543162152810245, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6356972128878653, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38543162152810245, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6356972128878653, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36247466608675993, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6011484151165629, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3037425321349514, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5893368096126496, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.41616378505554413, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6576322811713005, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4326013853051836, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6609703936604562, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.35104428234941937, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6071559884845871, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4079926989572759, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6592699047005666, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26075652499067425, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5605305670545515, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30994878928868236, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6006061482856062, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32013340270991775, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6222205233819726, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13339786348528015, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46778689835182324, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2602768294269028, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5310567541651178, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4324680011853555, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5877600878871951, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4493940083619696, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6230960824462234, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4116575552858724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5665759692366567, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3533147318401534, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972951640947346, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4771981137756629, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.631576374875568, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37929353357736867, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6046831629978785, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.44219732271776674, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6193429426274062, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.22913506273978548, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4939257941082402, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23006243247829844, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109587983295097, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.41852674506584964, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6035836275599532, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.47174948050634596, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6205798681912935, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.38750084779193644, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5621267916844479, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4839792901878845, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6380327835059832, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5690535070317115, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.694401643850368, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.433056028408153, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068335862669254, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37696437834356655, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5617832488367239, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.41601295722853754, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5905243301789833, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3175167304634487, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5599612274230438, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1810501938660849, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4154005351684647, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5018386916018573, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6673891538739279, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6947677373756656, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7941300666655116, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6412098671661826, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7665040244283648, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6045639360711837, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7576570567798335, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5438238038060724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7060850657954441, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6197738787215121, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7658055016335501, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6832136298239752, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7617777911358293, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6638859619095425, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7874224590682172, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5298074527457437, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6947127142026204, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5298074527457437, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6947127142026204, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6543739381048754, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7768522458527362, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.662399792163026, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.773536539587345, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6225849492542269, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7743614551597617, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6699094720554168, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8022876242275274, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6967404588441317, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8042365592031913, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5886489119980793, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7611944709376643, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6561309661336588, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7849652413082676, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6563682116572268, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7849957663929321, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5513991164786572, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7476990274059347, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.24311976929452217, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5332455436874994, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5821415139431849, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7329539842616807, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3861375213265022, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5122109329134508, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32539921259497445, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5133457276293165, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.375079512706724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5371301483272257, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.48456463733283883, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5906105668854662, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.40927553310427206, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5662949759228756, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.47662407876184354, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5978297795375753, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4845227999608418, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5968050469845498, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.43767708225471547, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5832672215535696, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.43767708225471547, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5832672215535696, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4494703452336724, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5696298539086213, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5361224072232371, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6206824468730637, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.40043696159688213, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5499753931711484, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.44981308897808336, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6151723374264357, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.44690678721492927, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6070649356517335, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4295014616287586, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5957510678657648, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.47727404239076743, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6081867525552255, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.45613271253639986, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6079134952024788, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29321566655721476, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4784943256117785, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.01656048993031311, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.11323797713183678, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4124136266900752, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5714981155807188, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4354194543126476, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6656175329857803, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5941815558294462, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7983203558832153, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4524360012660941, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6715594086896963, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3355428780074198, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6348737822745005, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5203658197535925, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.728651597345323, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.35291113737574475, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6466347966614083, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.47269414327373943, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7451099574206652, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3316233171986411, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6272179443370408, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4139457402861102, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6639608785371084, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4216321717480383, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6897801155729197, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.294827064925964, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6191657516508425, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4377505012720095, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6684596427144653, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.6036019309695121, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7801644741948762, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5022206885761161, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7105763508429477, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.43249073282965117, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6444897357478733, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.415412929081021, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6437233280372863, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4847783843624222, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6873592111602536, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2755378501368094, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6144463222131131, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.27979942810555614, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926090214839685, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2995846558655927, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.599404823793189, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3983045920261205, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6521777108605036, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.268203877206376, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5791297455379081, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.22766536738739604, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5321260120854782, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.14981855747310632, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5073561650857479, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.37767129707620756, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6263797336995671, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.40263336117444953, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.656129562811693, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4143449478847806, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6736569430464404, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.24638705113655374, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5790960837180688, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.24638705113655374, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5790960837180688, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.20430195455630867, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5336103118914343, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4602159439834861, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.15626437596725912, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4715421308516199, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7046296108422225, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4775743055476471, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7017340115397528, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.38187621052323667, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6423162801762098, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3491792142373769, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6341002242155772, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.34677612240149885, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6340001930650527, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2631288867571726, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5489643670410926, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.001753155680224404, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19348048287912908, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5498004622015639, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17329990217896798, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.583781848253705, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3373556859342653, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6738789170291255, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4059531821242849, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7392702727394752, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4658089028058827, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7584810978753719, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3654687885151158, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6925292674757693, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3725685850787146, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6944058070280722, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3720001389308944, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7013845085492982, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2553027433060345, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.637042750920823, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2553027433060345, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.637042750920823, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3649036594689345, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6887265942100023, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.361621938060054, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7227161818129695, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.18533803782909966, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5623323442262416, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4058364743511898, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7590590041728482, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.46087901425957023, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7574945544449315, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.33490167163730483, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6738850345838133, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.45475498440082013, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7890162768101745, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.45475498440082013, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7890162768101745, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.40994675674919734, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7134834638811454, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.001410039481105471, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.33415579274035306, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.676792081658235, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4446730260276365, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.755908468739292, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.43478132178539325, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7018196083546635, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3877278798081724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6833268596614586, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.37529811264444257, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6378039240510625, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.43482851137981304, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6920474811314331, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.400011517795393, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6786441872043172, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4494597917400064, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7073438158390085, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.28932340357490705, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6127615085528726, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.28932340357490705, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6127615085528726, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.35036401492739216, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6426566938482169, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4754775746253918, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7412539060902723, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.42540540518667613, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7419806831321638, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.511936592363045, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7671481459096147, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.46969023925544096, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7315431192429768, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.46625439194767143, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7058896647604742, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.49975754725037047, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7306601419467481, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3862844443359411, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6747509426896976, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.17314327152587822, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5307830562814513, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.42907605083910527, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6942504376084577, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.28038937103419465, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5038494750471553, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.25718495991757767, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4369258127692308, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3035527425754751, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.47611727660210634, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.003762227238525207, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3457321730564948, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5434124381049174, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30019255581073173, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.49869889490439867, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3268845394941929, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5177343510524726, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3014338594407141, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.46425589256856287, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30240075980371306, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4633715711061665, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3293368889413212, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4868090313482157, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3090542869620925, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5103110435016267, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.28680063574151565, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4509415604815221, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4592260218476326, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.6081527520886763, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3446713174282172, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5204675442759993, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3597572114060291, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5404525494108054, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3276986983391062, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.47017897902052863, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3249152568205164, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.47768812495239976, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.461854840187321, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5927663765598625, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.24830299714202062, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.39688820338442954, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31950891970955725, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.48966432562692086, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2989825848955476, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5979372541912472, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.21832405319137094, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.538599666863523, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3115768268340918, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5357057702068471, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2586945044297987, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5387430998949699, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.29339572010667503, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5936892246932287, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.28064547347009405, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5180716871987625, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2598777182656361, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5891629072611128, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.16456477665946817, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.41285435919616165, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.16655986548078744, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4124447084704064, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3036094747298954, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5533394965317399, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3720337795006881, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5959145385593907, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1533428945524185, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4257635310750525, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3182748865833727, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6082918458674967, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3516027503553436, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6195701584804578, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.14296936488768364, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.47550381383139706, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.40965066226588226, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6492365059868139, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.40665812654558303, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6336046294846941, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.14016942957305434, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4272307711607646, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1760794779214713, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4575853716562964, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.33403393270409815, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5656795429894189, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2607899218485117, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48702498962539814, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19112125497758137, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.39870104995538236, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1905322718463843, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3856189128703021, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24830922800703137, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4179013560771621, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14715094360492556, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41200144567386, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21926096955147786, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4167523577964257, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15922876554252086, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.39205452902204047, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.270143509661696, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47453394890664635, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.270143509661696, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47453394890664635, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29752721412676364, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5014416567505952, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2251933631863803, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48308242360890785, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16082381009791424, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4142991984214184, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29461544890303853, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5209091490119253, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13140798376069315, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3808902515851829, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26065672870526985, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48902571210477214, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2703464256217121, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4887431068878756, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2564158056447365, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4985422502909961, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2362210050289396, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44300184014250993, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.09001281287166815, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.29678105643425495, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0709787992575099, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3721953235737691, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16158718602920916, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.529420842984689, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11815153359326112, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4449148298646824, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1643968721046963, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.502337586581055, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0665116325665265, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4270729969076189, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.23668163623735117, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5999794441735549, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10479477169090108, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.48965200604299275, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1061903685456685, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.48402103757719056, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.18211387083835692, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4986662704036846, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.18211387083835692, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.496627095959543, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16675794580737027, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4319174282684955, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.08041822969104957, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.39054133168714267, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10676196248444668, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4217035289368706, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11372855109111697, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5442956478878838, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10409229950151488, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5230723702949467, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.13834666277961913, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5224463124365963, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.12602284615352088, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5260100533464075, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.09821580391129837, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.524129220630266, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.07559167521278282, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.43978735666129, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.001993620414673046, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3966243116248163, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4280674425361325, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6762864074630727, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3476608425783186, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6262956160259413, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.426434068388067, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.692779018863909, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4484733265095632, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7011327081584477, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4284560652173415, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.682978489192362, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.42432815678137736, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6929325034163117, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4840168429540741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.708011141564384, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.38337199664693017, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6131504489318851, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3479593282299806, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6085432841999066, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4860845405802908, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6654842087025501, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.39598369855377885, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6372978936473155, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.13243406366000607, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5138012833661083, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7611872892611777, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4338915749119648, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.68146231376533, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.40431833568589515, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6726559802484742, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5245557811520258, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.758483386076469, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5245557811520258, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.758483386076469, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.40896014720436774, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6783188537179585, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.05352869081949678, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3819546890461068, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6360111897754611, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.31513262466552094, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6088063321214817, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.328480036965797, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5845846160692275, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.37255875421707185, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5802452418652684, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.31783906024075725, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5668993446336066, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3193473733710768, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.546188037439081, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2764303380184275, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5419234868461389, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.260560100033495, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5367621571346236, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.09806297894460858, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3903932427657001, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0959650718435203, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.38796120798722006, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.18642242278321888, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.46911664367808803, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3560731743608624, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5863868433828183, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.24628449712479664, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5007787042784715, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2926638002566019, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5719277380611922, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2560601537516366, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5528416138114, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.28831927729808415, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.581119580581157, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.11379991855874161, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4351192402449332, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.36838655253702773, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6229761260411643, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.20289080919028094, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5022073516929659, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.0020798668885191347, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.08765181859659299, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4198399937062217, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3357114888560433, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.577627721559994, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.165917178563774, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47788177649200114, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1589577308024652, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.505896276887539, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.16477445466487844, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.459819964261446, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3834925243677549, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6550089441848741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2082930998913907, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5532091847800142, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.34818832435375924, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6021958119247758, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.27581307652167925, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5553865940438732, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3223396024323621, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5912894027026766, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3146657215630817, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5835370042710721, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.35158019441290855, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6120838595405107, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.16133896524621585, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5552482281757406, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.42784086863443627, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6882160381334613, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.38591922147783786, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6582794169866106, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.35070110489424783, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6303302920665712, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.30861758644255877, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6190092441225287, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3576059081620552, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6266553980737001, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18163638157001039, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5268292196755848, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12478244272048393, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.36067860302550464, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2869154404670116, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5724031955301535, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2715808911127579, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5912854463960201, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3561131937728394, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6360822051576717, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32531817821640663, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.60401575980519, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.28671896452833406, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5412439325599205, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3716909306227889, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6400806371173305, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32148367781910586, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6050096649366548, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3962150993148768, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6615791069918547, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3461149674280951, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6080288192802612, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.326661108144707, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5894586069082428, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24809748407938403, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5410362169381645, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20939224238332071, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5362745679788242, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1910176196331082, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.34484093017132394, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6395426495902622, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3483096250476608, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6318122440778494, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.34806159319735747, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6310588996276091, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1329195573506788, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5202656846184245, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.33103091910045723, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6237034632133035, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20767811562631466, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5422928733421603, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0012236906510034264, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3700647909339469, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6502447445733328, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.30006024473641996, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3256826624869121, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.20022216695167708, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.25475796453209737, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.12506045850590283, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.13089074029559336, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.300747771019676, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3173961462660331, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.33286620570997505, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.34215490607059196, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.29527663375827945, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.30142023298576054, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.30843187897727053, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3486791970487533, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.23332061635710627, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2566419801141603, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2405665302731005, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.276111632132572, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.24952367521090352, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2609799307655936, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.21308267744008783, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.26013548835400724, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.17230159795640088, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21902099386210508, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.20712109612877888, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716500132697994, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.40137962383426623, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.38675685523696335, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3893349932266618, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3929514836681183, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4198465413343155, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4220024962966257, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.44578201596167943, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.44136419881837663, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.24000057851120946, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.26789658199295885, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.11631906556464608, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.25578558071981117, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.27270737715571486, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.20138961649645912, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.31544376598852375, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.18010353259801426, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2514369893270279, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.11110503478177579, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22819017092278873, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.005980861244019139, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.22605108973356855, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2500902724267561, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.24058562078978266, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2719774362951576, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23057388791871672, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.28722063151007776, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.18087334221478837, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.18087334221478837, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2316754903737317, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2711134955494406, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.18922003884382727, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.23842255928924005, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.14597727071242525, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.23275105527196266, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.25726126471723726, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.29380082192915835, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21700184332800349, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24253963535587755, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21641035200842274, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2441782821829483, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21649213316213017, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2412402757213432, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2265279670953065, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.27640631169553315, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1580890560041312, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.20972147530113283, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.12340397529268728, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.20444453862205114, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.19611486182177768, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2607156802280298, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3575554083195387, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6357905858551194, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.38341396472331346, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6437362313865919, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3554972580728409, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6081126361279929, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3772896178241295, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6184118090739791, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.343875968447423, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6203080869559495, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.36616653782818587, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6186956523981838, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3878454500259306, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6551636735823242, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4684056503030179, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6820263032407077, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4684056503030179, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6820263032407077, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.36342165475997645, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6210552995089563, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3600088669321733, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6231275718412072, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.23572077943091702, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5451532106521425, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.38833403743974904, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6469273691949573, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.39079551128996964, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6489080988628403, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.36915900473214464, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6363403499615194, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.24929044937149633, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5801222490607234, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.343875968447423, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6178241856628925, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3567821215601251, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6144568170893193, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.24235204007755543, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.522917324145437, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.25613866339667946, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5426654212984436, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3212586102862301, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6200344079713044, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.29542603524950894, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6045890208480269, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.23349565416397317, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5824998907855937, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2520855787839461, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6062602861997253, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.38414552248057476, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6295439096190666, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2968684306569404, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5965916756005551, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.36350084013796624, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6256665663158201, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2789283308488842, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5934703247441289, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2931172960934451, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995777718150087, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.20585702977424392, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5580925588942957, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21826859708159266, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5755949064700074, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21980750979637118, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5624240512886104, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24845562787570447, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5720910327241964, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3148545032694575, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6192770324499772, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2970503087292613, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5980643974127514, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.32937445908608803, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6217341259690761, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3296849443412897, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6225198704824016, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.27017327254261264, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5963262712857498, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.0018463810930576072, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21919094140903084, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995081065956405, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.26045893297751727, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4556002624646449, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.25769536520621106, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4479264653484257, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.30518679384155084, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.49812450266220515, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.20790986473853063, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.43810810831688024, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3840172214263318, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5461608432733371, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.27601292689095075, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.479609409219689, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.31094933095062055, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.49068855707946196, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.14924113955829205, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3993127431049283, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1608791791488148, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4188174293611585, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.32978122911079866, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5164501195528906, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3790340178068295, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5755612586705559, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17526112080278783, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.41835629727825396, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.43168198777904704, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5878046851833884, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.27299273205354724, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.47703707799679956, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.35272604744531355, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5246522557616184, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1776418682110657, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4643539241341349, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3150148909766856, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5091327700523911, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2281330714542381, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.45039469008505395, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.03041070289802544, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3554259247981793, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5460727299785414, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.33202003790347423, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6008057921287521, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.5169325988486032, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.7139612818852349, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4906167177513232, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.674851813552015, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.44703919955084814, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6655582120379708, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.44929080614862915, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6890808548811542, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3609839853716293, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6437289665225415, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3833596037687312, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6303767331020778, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2787847531255836, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5553047982045038, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.27255394270901734, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.560183488542227, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4114155281543391, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6302831368434185, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4935048462514883, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6943423484942555, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1515895761088375, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.41903630508369216, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4620212793693587, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6929628328392371, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4134846607579587, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6553595923197755, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.42152926358064985, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6613648437887835, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.38603411668768844, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6493990798045837, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.38597038935683725, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6451087674684899, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3627829136084678, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5991830267537791, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.0014492753623188406, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3088416770749959, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.592900878888669, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.48468513699279653, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.627759796330558, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.30044261567697145, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072959120235889, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508149210340218, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5412873655506261, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5060360517265556, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6405988583997455, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.6276032281768119, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7074650729422788, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5486141288222729, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6319028690820276, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5350553356274835, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6061404961531679, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3896845293886086, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5335919786909975, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.33162808255530973, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4841083339891595, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3812595798653677, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5540173999726016, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.281835618300566, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4873620919042211, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.30276219275305616, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4761465803265626, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5487552982567756, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6792265965211294, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5508944416251419, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6588533619086353, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4935985544773577, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5839815051859267, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.19590685591514004, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5086260671383968, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5746911772458507, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6684658929287185, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3281583483538118, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.426617457491999, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15854102708982298, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.29816951752658855, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5077054904758644, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6413587197895713, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10167233309625472, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3890033983715864, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3452100271202966, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3365774376014292, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11212490144847859, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35810167137638094, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1530769479628908, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.38999571084193796, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14059082344844442, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36810674844102004, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16111837221606704, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3897870969473364, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.06638808017417316, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35588296621314414, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.06357046508728754, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35389427159747067, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.09899176577073174, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3565436122576164, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19038191039507293, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44678045008872613, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10551537698703499, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35068439530150025, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1965846862574398, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.432874358322642, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.12843357570124533, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.41736340554194123, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1448369993567943, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.39060651030049987, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2881123154846582, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.08452508050159857, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34365275227384534, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14889383875898862, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3891856763439179, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0007098239636570131, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.07859507887728001, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34793858380348625, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3118864893794733, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6025015600895789, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.4136053909671976, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6322647535881054, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.32738231617619146, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5178441994716841, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.33239407888861344, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5517145772037151, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.38521342747439724, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6429066044491251, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.29077749224583394, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5054867926717619, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.29945813065831656, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5652964318283186, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2827954145826843, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49852160493934883, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2854020994711665, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5032170837254859, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.28872738709929546, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5546188227982165, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128156975833063, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5919269831732986, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.26240469087814977, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5428010725896136, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.37284549147595164, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6659031225862503, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3320217594322534, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6192339182363553, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3024784802029001, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6011904529037848, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.14197937102005218, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5124141579106466, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.31655432248594445, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6093384140863397, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.15346840690325161, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4338689860644609, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.001621271076523995, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.272708665260741, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5856254575566864, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.26361261152036314, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5725817956180734, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.1678792928110798, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5011442124633559, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.02662451504569158, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.10736906463248816, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.24213116854522995, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4880061744544797, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.34632821240024325, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5555330144056625, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.30129541227386425, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5580271496335837, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.2170322997863664, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5313522319806132, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19108485535364833, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.44330638580686244, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.2055691815843588, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.44032837442459305, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11564238061024554, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3946570545606821, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.12964303350477555, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.30757806042360225, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.045073226570505734, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3072344833719616, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.47030940521087083, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.6313130600162312, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.426869842681755, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.6231228373188124, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.09626171192133098, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4030384334867132, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3040281643947945, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570113610277189, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.30553566538204696, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5414414179536475, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.06634490094756204, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3933989156814178, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.061647053376497875, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.1554907346188576, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4319476007109481, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.31924314606108933, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5643587039540041, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.46465436703612695, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6187472056628097, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3106871546015398, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5481440119652243, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3812607926267945, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5800153448910886, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.4214243411279288, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6022332200292522, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.40881835715890286, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6176764063902023, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.40476128586074045, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819090999793647, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.31433899798154447, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5302700213057665, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2500141193775793, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4962715087072226, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3219984326515652, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5410548940338055, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.35814583416540713, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.565376261866713, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.20366453872893497, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.5772621756887225, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.69776600214117, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.4597577036346123, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6387649358449012, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2906050917654709, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5466988553506439, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2813982099058976, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5330166410689565, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2813982099058976, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5330166410689565, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3987369340280224, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5966252340653917, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14629782696806198, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.37024417034105334, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32251358317473644, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5152627703184472, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.14222805709331154, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4996646755261327, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.19867232982300087, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4721193400035613, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1786760469371898, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.44033730462672854, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11069519754858706, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.41088429499755413, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18861865099009703, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5044545567138358, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1643795274638477, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5226815528917953, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.23765992259974003, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.555280304898008, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.07475227349054883, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3917102662716776, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.07591053074393217, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3680248117024705, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.004206414136417992, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.11624166283801136, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.07951687821150766, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.1785438178241626, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.23167461084261498, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5805102619993613, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.22920293798023986, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5302965636981034, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1782141139149533, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5052627255314486, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.24423980006414553, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5580259395074147, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.19726119412992152, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.527856705300845, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3544044522309282, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.001968503937007874, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.19386382603148064, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5282523988633897, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.30065550001026614, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078021900511434, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861877593848395, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.497612333201281, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.27099887927850697, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.49735991145649855, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2269082917867594, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.41247660565256244, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3564226671132021, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5759796085098173, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3209133858060272, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5100662436119883, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3540078976511945, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545213530835353, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2118766389121268, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.43651139895807956, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1995171046317962, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4153129741061397, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.11756843853580158, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.13583465343472367, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1603296850044792, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.37078357002906864, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.594665372689691, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3656717313145023, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5646486568757934, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.34558148449270437, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5585506842803649, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.31930026329775296, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5301066308219036, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.11123595447088756, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3100651531983498, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.06183503355439259, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.23339083303276537, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.45781415663727687, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3880515884750121, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6587916715823183, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5142726846179982, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7344716263345912, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6066498620510337, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7812137754227463, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4342750764549485, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7115011221714777, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.38939692979759266, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6629460107202892, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.305730257543728, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6305034408922697, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.28822910320599077, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6087031937056202, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4235901905046571, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6897643314228233, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4235901905046571, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6897643314228233, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5011893046413795, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7089203664957927, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5443441840177868, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7408492383397802, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4096284280333042, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6241258962315377, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.48186321118136805, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7050536782800418, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3231928392518462, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6353582763408947, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3406014428030703, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6514548680180557, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4492327786840591, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6917786880624969, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4492327786840591, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6917786880624969, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4682583023691399, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7090532819754138, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5105553787243322, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.44571331402556874, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.67235059873138, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4487746167679644, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4476730201191672, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2836623400057614, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.29147337237183046, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2775905064108025, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3165767280260291, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.28912432952036243, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.31119603942667584, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.36793172580653655, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36238203264481056, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3652139786200916, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3824205406345779, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4094748015187699, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4288513205758089, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3613816886544421, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3747448802797138, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3613816886544421, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3747448802797138, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.35430370029300495, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3864890531682498, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.23877027184328983, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.30469172536496925, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20137543786547824, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27449035341376643, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5702271610495845, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5772088119985683, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4054283523843365, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.41608551048864156, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.37405604379521823, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.409758558051675, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3835611536417376, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.41360439536029553, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4416093606258131, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.45147509940396857, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3513670909441729, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3757759666240857, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19333361726926898, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28056620588920506, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3541652369790141, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.38739546241623046, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2113054108348111, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44238229987470284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.26207903587847736, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.50073123223194, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16098073041469485, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.39710375075643284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11465623153412556, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4497512968651573, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.20350864435725843, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42265642784768853, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1866741141650009, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.46590330015791137, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16950698451288215, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48668984177868246, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14357645942230385, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42910391585605345, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.07762075642455304, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3650474475262904, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23516650478671175, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4885052730214997, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2561279176480246, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5381058353070057, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1343270650805756, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3620549501198994, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30632520148437686, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5904333377596244, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2379983204095325, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.557901480722127, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23477037244978113, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5165217514090542, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2711981710401392, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5555651822168547, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27882680411780003, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.573831995268334, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16424304323370437, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47015962613702633, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.168777027092081, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4368481165562445, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.22415590998535484, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.49981791926190994, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4556160153884204, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6661994452325181, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3216756020053242, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6141241026166391, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.43369048469848437, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6586872889176818, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4098419224543478, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6358736384460296, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916220735299235, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6550784354069854, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4310064928034671, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6681893438144855, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4527112325797497, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6708989870027865, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4656080680369174, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6768276245760189, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4656080680369174, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6768276245760189, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.403282335120862, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6319223068216205, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.43198701643600734, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6357518563774851, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4008289023648176, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.619515936983655, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.47486558569605275, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6916379662719394, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4194404958713696, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6621110611553065, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.44234482870142466, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6617260327319175, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.465541200947692, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6735988737803571, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4650859079784916, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6778737063187279, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5018812162175532, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6982877049391611, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37462930793644134, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6297969107438809, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45236333724230443, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6557435747309683, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.37570809340937233, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6339141734561076, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.559332422592187, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.733291190094771, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.48457382450313924, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7144409873446065, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4881942815467274, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6662053431593723, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4577386766301953, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6531477300791018, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5216581079910853, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.71892039370478, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3977038258772401, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6202897864314184, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5071615322885569, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6902107811249504, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5071615322885569, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6902107811249504, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5024073848733999, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6805608953669952, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5105445285326566, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7006788698374967, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4375567656377118, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6314357170099009, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5531306492249056, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7508908077947477, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.48342250090939853, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6678984764077726, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4198435178617755, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6615330486958915, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4423392581565186, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.663370348519268, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4857904515144913, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6855885711252954, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4650185344284313, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6620634442558946, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.42298863290550076, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6260112466527037, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.422714343026006, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6266965858252854, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3446592076818278, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819912583909785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23270938096152352, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4490269267329941, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.11634468327243708, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1766119944524977, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3986479587107995, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.36572840798667283, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5794132414395217, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2729012183957552, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4275664216118961, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3167585643537871, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5076869840147092, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1485909701386744, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.37920460252872057, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.14913158540833668, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3661331120085608, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3292454551002283, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2817018331209101, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5332946673019744, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.10377361777079974, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3387746188862388, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.09073980329024364, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4022599112382079, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6108631094489827, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.25751023494151143, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4697665795408892, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3212983212315964, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5599573621112933, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.40185909552300847, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6025771204693394, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.08981148917655501, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2560040742784669, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09236883467211593, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3472719365557752, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.529527758323629, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6540432510655854, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.49704232910799745, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6453248294274054, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.49704232910799745, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6453248294274054, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5494410974163585, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6853937472090788, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.464523041040441, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6240758263844317, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5201554059039623, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6912283534522488, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.44114781827798216, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6241365710582877, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5525047617046378, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6971891434573309, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5525047617046378, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6971891434573309, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4286794450695727, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6365023289177463, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.47406277913160133, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6744300843116897, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.35313312546871106, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5770478822626409, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.48149733895714797, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6355577992154319, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5280706683356512, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7101124584086957, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4744991305294048, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6720481841701565, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4946489712934811, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6635756951391838, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4946489712934811, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6635756951391838, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.499920808370058, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6982198148797377, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32345422777393923, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5652905380017423, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.473424955479643, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6791725069180572, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3479698393875884, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5760833125751785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24373253714463095, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49482039214573803, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.312050635062637, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5390444512132623, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24229889794871173, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4853505495636382, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.27597105261790716, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5468536896755547, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.25483341226864464, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4565768361133673, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3542266508664836, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5643413028542406, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0798787470647272, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3626462425183916, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11365874698773852, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35898391291374204, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18282456123768265, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.47540661243586124, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3634148417932591, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.606148757613633, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19912607659637732, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.40793214264643957, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3721154325198309, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6214976185877734, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2998257682809658, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5433673039371607, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2921982022041547, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5264166199754001, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3142825719425009, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.584353897647861, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3721154325198309, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6246670078758891, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.06077234009981252, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3407473281004383, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.17473028966988555, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.400425072418037, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2988697040013311, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5442522660489195, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.500703635659656, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6501904887399698, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4876463179677598, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6113405963585182, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5199813503697857, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6584629522606407, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.47431965025538014, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6291489876001155, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4767709962267032, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6259826462063701, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3995439803178399, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6021193793256325, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5428215191518801, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6623001913762342, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5428215191518801, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6623001913762342, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.40656183899584336, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5890799945028116, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5703676203923652, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6860261633054247, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.41482837856520677, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6168104640783212, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.49952715015218047, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6422528647342378, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5142542955490234, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6424880376323772, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.528547004876945, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6459593469343872, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.45002572171222577, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.620458099259989, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5050700246992413, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6475387594613298, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3614492694100171, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5766912416178624, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27875207406965286, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5095968928696253, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5576102993622991, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6640761861237344, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.43330223254789785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5564499529933307, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1466607445607986, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.36552963821230766, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20527494029659898, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43586475049009993, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3047577636054668, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.48318512703629857, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2580795409309559, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43914026663016836, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.16510240061590087, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37927849234648453, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2993081268625724, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47777429598730525, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08164832509153802, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3152419985155113, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.07930561604985777, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3127076147872426, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12340057804403023, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3331532512757645, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2533658570671817, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4526653760708659, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08576088673755342, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32805247144500205, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.469516870711376, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6115882471919187, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4488093387952295, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5788970821297579, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3196191720459511, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4758634857690128, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3095674062940522, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.49847201920427264, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1670991646237069, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4262476443999306, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.07802167298296538, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.31681677382456264, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.08748671768279999, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.01250047619586174, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12383271014582256, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.41807822202441103, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.577545891208518, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3475258894340562, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5729813197277963, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4610791064938662, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5982792041883009, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4642320266834861, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6555816107178817, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.0007348618459729571, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.38823222788076894, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5684042820738361, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.35312894221988256, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5483853808672988, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.45147878499907884, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5848017728755368, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5084875082100132, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6086126309449248, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5148124488217735, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6443063241500353, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3731480983940342, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5483607294813455, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19545643680451924, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.45542992428164836, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.6009332869110189, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7201156007131091, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.42201842197924766, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5864023472716742, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.6427952895393818, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7611062226622591, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5908781325191875, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7135251491959551, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4051294289366985, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6289236919386905, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4267442071795165, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5873661332463408, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3202249300680136, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5240877863757325, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3021474642420404, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49488718577709084, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.20238796310390209, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5665467522687606, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4612469192468151, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.689540484203802, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4688149931788708, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6935921004770637, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.31907001507985117, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6396787125445289, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.42575254909588905, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6704469753084539, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2589941364530944, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5932208556546101, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3473313422920779, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6495220842154038, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44311394435253, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6857046279564323, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44046108667071515, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.685227653600115, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.15605718228191343, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.48559902973042135, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1862321333812371, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5434259794186684, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.12928195437567142, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4539578151160054, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4537382610431785, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6984640606230582, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5813201988276469, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7732878388618204, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.48361343491637904, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7036055457806847, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.38248883198762607, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6879676788132258, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4685867885254208, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7063293525177913, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.21611447762886524, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5783999378427704, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.13828520206051664, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1943642066325126, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5437845506934148, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.45718638941364104, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6765316874457515, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2842437601270078, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.616554183160495, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2790392444035496, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.61628786229813, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1969296745448077, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972281991369082, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3108511413762713, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6249458527250515, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.33159620794115396, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6420436747364323, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3319181496626261, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6296213700542458, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3644356127641462, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6333838791879324, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3644356127641462, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6333838791879324, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3239007562192641, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.632048088218684, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.36455043938934395, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6460222600356587, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2330073650181948, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5651770800376458, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3728937115586142, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6577808113928253, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3859055776419563, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6651384368454005, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3668099116926436, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6472192230833502, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2912353795089198, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6107975281071784, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2912353795089198, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6107975281071784, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.21326712180309276, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.563678179720206, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2493518765085485, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378176060849199, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3199241796546606, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6151079711025308, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6260866791475674, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7122695616091047, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3856140451435003, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5942230347389459, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.47809357926196877, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6515239202890919, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.45010740912530395, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6258219368613708, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4729242406289264, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516452341616961, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4876819889238188, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6502930111654278, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5145500336945869, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6710712793486331, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.44650626058382614, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6393035001487706, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.44650626058382614, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6393035001487706, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6152720493266995, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7135410085152943, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5631686113493045, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7184482394204479, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4545770466847879, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.618156071536431, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.60059205595428, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7065044125404809, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5848425044978098, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6948793399158286, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6505785903453039, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7498687328904513, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5099789058566842, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6591589669208932, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5274166051698492, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6781556869000741, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.32481728488530576, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5086455217662933, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5609895346624986, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.681370247009325, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.025127088788317715, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.21031980892802613, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.14721260533033206, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2721389124032325, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31315822356102974, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.37596627611906025, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.16777402882140335, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2760884586584135, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2504706648057762, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3683837339255408, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2500271340971054, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3785374220544885, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.29427156769985635, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.38674923884011136, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.292199920758876, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4387549485056524, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.29776813591532686, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.43498932645957955, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2923799210557074, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.37275082068184157, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.12967217202146777, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2760408327025785, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.18371750884517982, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.25880501169420894, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3995850754493543, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5087563569873156, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.06706674495634783, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2609736645923168, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.28256258503905557, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4403352184603733, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.08149835562288019, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.22869808598813696, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.37305234678529364, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5149547593330109, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4188415744726475, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4486815117132592, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.12394169238067233, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.19833625372206998, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.17671397515361123, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2956531697676844, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.23333094197299464, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.49932033002402926, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.11159818222678333, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4729286927154353, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.07043017228845555, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4191306757394529, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.17011874208765, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.42603202246343985, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.11589839803393029, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4642618457479359, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.392011058805531, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224062856802975, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0560612491216656, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.29667884967175284, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.13616753594738606, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.05059530408535041, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3965392443405426, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.12656665377079704, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4038727423970976, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.07695254102714547, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3675548828619689, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.22438996279857726, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.510666007287132, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.204118864406014, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5209095484632319, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.17273369377646117, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4962992066513333, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.40336122739639907, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3514306358044619, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.11253965279954393, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3312806076267322, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4060492501139825, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5683580956250115, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24406315755131383, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4895502947549771, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.18506675761729544, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.33180904892662333, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10878690932666137, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2847045760566851, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.33882318194431066, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5047311452330864, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.252348200648097, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44101744642019985, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3095017641836787, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5048955319030237, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11733446372903365, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3700951716975203, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.06040259454298557, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.18644218459633172, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1787841699697072, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4302346068514662, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3896656593774665, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5661203320687994, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14467877387944345, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41250682657762056, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38202772639517846, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5663085006069273, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3422256853930671, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.572244640299745, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.42152658422854977, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6140840595084299, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.18829060751796134, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4750608803591822, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.34709531432802904, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.512413746971933, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.08136547129100896, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.36756815874876, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.008152391697655444, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.12885984770237888, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.08482964569751385, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.37059552449706873, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.07640690432316208, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4641021856152639, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16305626357596484, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4931017709619054, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.100607017773566, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4914538834431842, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.12123898135089006, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.49565291998466804, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1989354390390064, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5787262395513628, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2282403145147831, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5807113513194252, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3534880761230573, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.631198012145244, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1846217208650883, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5452524689405125, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1846217208650883, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5452524689405125, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.06829403401932822, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1478699652896728, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.44530449583576903, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.31494693194199885, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.13377365982495612, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5270242775913284, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.24729575836552034, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5673652711315395, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10737595646578652, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4590576572286579, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.22617570410381801, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.568153735908772, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.29201611245206655, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.586127589003777, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16100867171758365, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.525256386579363, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3548850735136589, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.17868791441337914, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.49233505722359083, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4264281995893693, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5686715593689737, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3755133068349505, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5350958966397039, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3604013045286538, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5240948375874741, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.42380027373907764, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5825787222593419, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35896346088508435, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5234247995922829, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4597295132616795, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5915321092794668, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1806974226541495, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34826902612300276, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236428393720809, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34826902612300276, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236428393720809, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3896357749382976, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5711687057303002, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.39901184766876213, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5638068824363435, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.08308865150118551, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4154783517972104, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5523079443545633, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4560256553756281, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819378877070078, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.36146694802217977, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5217871951951657, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.40621309956458085, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5705677721515976, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37857007405534787, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5432976559190418, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.41974022583187615, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5789182838893838, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16851048608687724, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.39269226415104036, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35193239373450247, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5556135973396488, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.48773160239847224, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6806798635651312, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.30528565543580843, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5318320483880432, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.42564879277646966, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5834916199322795, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2816107594296114, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.47708260904446365, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2595156374159681, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5213735061343449, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2529362972744106, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.47633013346678205, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23876161112129105, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5060111681210002, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.10996317447417582, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4562725637074783, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.06283136506124469, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.35716545980725684, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23207681151022577, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5542533872428013, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3681328025423664, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.558946231881744, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1610181647616454, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.44401980465688484, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.4488857379742467, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6217229021975876, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.37214272882689936, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5776707488991393, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3685432686484428, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6081703722373271, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.372741651699079, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5562538159679055, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.5270835405692063, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6545175152961233, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1463099542325129, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.48200454621829203, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.013528602079016893, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.18394986790099302, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.267738825901029, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.558747920793312, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.09056338921321325, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.35831291876413535, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5383434353225599, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.21309892072457706, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4807665621271328, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2751568390064031, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5062735871956761, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13878568111175027, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.31242003815205044, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5563618145874679, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.33552713250034905, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5622995574132048, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.1350251422473041, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13187231723607926, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2453793854893252, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.49293512322505023, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3340915684104956, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5881616164627295, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2231805576223107, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.48399526463420434, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13831234995224262, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.37353627859030286, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6199557289504044, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13044102074288644, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.43717039423668463, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.664436310127455, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5116998504087426, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6764649727822265, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.14133527729676643, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4869689368209618, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.14885037234207188, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3683017467934542, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13910441643722132, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36925904697255574, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7332676622154629, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.22701436815032078, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6596565788920288, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21750412168041985, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6503148632624418, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1255606329059252, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5394589084862519, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3365506763118682, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7035699246803807, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19743714411828858, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6726680111377844, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29353984288388507, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.695138133719427, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1859329553189182, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6312651889606834, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19698875496087567, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6426381621698721, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12502549517250966, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5134519176715009, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13078499079113903, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5671925386952724, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.16114368592099126, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.31381150232540356, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6729359211590943, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2762254293612262, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6512210577651574, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2485715475529631, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6619189737616092, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2443332858855278, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5746305891359669, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.28631922426989914, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6564670370777995, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.09645378139499047, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5692397577624126, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46092365794867524, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2532325989817117, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6340502380249944, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.242345930892648, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.14033475286594138, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2185459072776493, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.15440513664352318, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.20094346456661977, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.19947388010084352, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20584516821270868, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1643888714136299, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1552450511464302, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.16896348843364614, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18418484596876458, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.21460501386434508, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.24916141637142014, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.11335626239247124, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1938403364799882, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.12987138545874835, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18852965533555474, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.29074805324424113, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3256819925596258, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2941832965408921, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.29022301417720925, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21156470995540266, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3764522333698161, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3811591130797367, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.28023341443117344, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.305113560344023, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.19368866426178633, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.25076687061996783, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.11917829382682127, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.16122680340356227, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.09158971449230888, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1604651736249577, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21475571940851748, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.23332152661609917, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.19089930432372385, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.25962807098290386, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2262772028110626, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.28390860213680813, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2533922732210082, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2943322308733634, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.20407692159771756, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2928742362146829, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23258136002668917, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2968944499287726, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21966878190344116, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.28531911189599546, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2385000210355777, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3189153081904016, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23325505861671608, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.32216634867002286, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.22433948264456524, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.282183022851947, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.156393512642663, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24731090719513005, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.12860612501976387, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22143675653609388, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2221235626911707, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2906383281974251, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.08220959497313386, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.18242471007512046, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2078787665206437, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2756656981575715, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2206771360207329, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2933989965217572, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2286976241275377, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.29912179287036944, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2509281473780815, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3278628963365103, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.17151210435282105, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21075293387410687, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2893821949552371, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.24780611716850762, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.47134154774689047, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.22071227044983457, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.45625358803874827, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18572499842104812, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.44634283332460656, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.16045962097781283, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.39926109862673814, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21696202869083803, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.450726878878052, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21671712256611847, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4510971285634406, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19068201647342703, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.47264292072872943, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18402221306466232, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.46311339911247473, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18402221306466232, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.46311339911247473, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1832966984886601, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4594348688263129, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21671712256611847, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4673219674304703, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.12256777245297515, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.38884595455998683, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21696202869083803, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4542389365828182, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21816681623776085, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.46537342956568817, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.22295499304150054, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.47766237893678676, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.22295499304150054, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.48230017737333225, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21816681623776085, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.468844756103637, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19068201647342703, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4764825215004141, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.052213612047483024, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.3400818524793406, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.17037567751599617, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.44610427748565656, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.37051721129462284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6498104569938186, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2580007063666063, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822544210906975, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1726118391167977, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5023852140556049, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2359703193429221, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5552647086717654, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2629904892589347, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6085636416475416, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1862014063118161, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.560745680895326, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3786316249953693, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6635410443202763, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21961147134282902, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5737658144410308, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21415796439643678, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5842832527331845, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.19729180187303783, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5692505097947306, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.20558970366346235, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5814353567611998, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.14240979097995507, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4730976331957879, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1949122052514416, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.562469018867901, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.25886046887579395, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6034516113607118, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.34133265816725017, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6366815217770301, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3800346950970518, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6452882639363281, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3783339819364715, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6593521342223361, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.26886359928085424, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5762551633274934, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0698131466480269, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4083625991769138, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2174421310712539, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5984930470698431, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.10361823626504964, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4310581309821413, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2230178035253426, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5101738085806462, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2022608472298022, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.48309607214155004, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2937928216821887, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5533712218959137, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.434861015152756, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.6160254003525064, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.4275132190811836, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.6183948028416776, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3384715911287291, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5853748388768727, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.27076744469211, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.49404709300383315, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.27076744469211, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.49404709300383315, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2270708174569185, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.48624546816788206, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.21286864758348062, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5101291767382903, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.21287094606484727, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.509766683670224, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.19889636825142532, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4442470563066048, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.30945224546521716, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5630983761409516, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.23181745459940442, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4951093505397665, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.24038870654195743, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.49916550876761034, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.38303366784174964, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5620304430819654, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.21123963087126463, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078608171293139, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.21286864758348062, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5101291767382903, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.32524223671805436, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5722293206942193, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.36794933674561564, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5158310722885024, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.11504003871888992, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.34098902843025203, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14417769394396596, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.35807445053364595, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.08176715535784833, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3376841807558391, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2774818103451689, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.44113079826164325, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.21111088301177083, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.40279886474527266, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.09986058288520257, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.36871570224848776, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.16631693106339326, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.39159831614417295, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.06964145711431875, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32491647022155745, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.07276433052402366, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32116136596381284, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.23308174848162824, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.22218552068256323, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4450748679632032, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2702057908347154, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4678766241841472, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.17699163364196366, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.41857874045442445, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.09553437121825695, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3595876505396123, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3118241997140786, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.12450545657451231, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.34797690177486457, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.045901997129390526, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1702227105083055, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.39357471857893617, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.40551649928110445, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5930648846584049, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.26393609309752497, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.537555561645112, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3303117725880122, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.560168275325771, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.17503312584389935, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43133110833149213, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.43900485344233137, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6549871625186566, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37906205367803575, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5725673261037826, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.36601964108639373, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5845337385286451, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.36455710643820927, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.514430107863874, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.36455710643820927, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.514430107863874, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.1812076974459594, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4487884876740554, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.1883530449151216, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43047232893800663, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.23572045080741244, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.46564299111426677, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.46092486073366884, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6626661413371057, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4174698823555261, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.650238295834434, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5387442820481836, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6904491113602272, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4282802115951284, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6473023220986025, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4526263247089748, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6652966172067265, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0703435502132088, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3516259272346158, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.06672319655621395, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2955579944734376, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.23168344117683473, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5068903585027874, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.17929000882953408, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4496535426738611, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16236014331815277, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48989515178016146, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.15735842874768727, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.46857817235250293, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18862094754879255, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45176760894555584, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.09202515283226734, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4638593627310442, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1330961608942028, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45025136594118575, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.08722778492512923, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3364641574903517, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1476568104717575, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3849224616617969, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3775112600948674, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11864063947615534, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4500029014001494, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18688320391897995, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5004399871729917, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.051728432773177326, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3142433454993249, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24721511426925932, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5442406939070432, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.271853263303256, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5585854392009959, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2959455734694776, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5713837604330028, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21688053959403902, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5062525360225847, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2073783805337231, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4984632492146573, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.32520872522373195, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2861595664904719, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.12357944977207799, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4078633867106628, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12507294238386796, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4138228238855707, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.09662080155533485, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004063786929589, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12582532774374633, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.43332988875216105, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.10101514465264909, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.34334971796824104, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.34499321757027035, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.05192269447013607, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.31860591509332514, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.07838231858034365, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3561023397717499, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.035585322625341406, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2801274742887161, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.04543034413119372, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3455689507412121, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0048954756112974925, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.10879120802014058, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.10369660120128882, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.332527913188454, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.05487814270156122, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2827591580294265, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1839647698105875, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4671696314237755, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.18565720303394231, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4755242091997159, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16231907283663688, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.461275557483938, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.15101262144193833, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.43087539057782914, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16933060149572027, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.44273639204747295, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3134098737361091, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.06799675342315524, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.09997603846232973, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.34271284178793127, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.31930298094188314, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.18843423878971213, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.406619208342576, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19081286150787086, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.39113510970460724, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.20120733641434646, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4077641022795532, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.08237593849286892, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.39211133589483876, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.34044840921931474, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.30952716928360685, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.050197821982229, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.37264360189446266, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.28971150857646244, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.13891712410260026, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.042989735393950027, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.30351385376698004, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.1200764997254734, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.584077802245465, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.718852356264379, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.525804146413243, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.7156057074240233, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.08851034303538526, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.35420441276966086, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.14180177701637703, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.44423269039394, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3561981735709292, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5123206368687799, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.06855125388880344, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.319498334710369, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.028396850905583346, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.22336951391800522, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4020788409904052, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.28599643357892673, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5040359554493156, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.31290644634463044, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48183974715675615, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2929916734973674, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48088316152599886, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3120625543688931, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.49617263318482635, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3648098913192682, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5125064136233441, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3117704018047124, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.49192558483985005, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.37246426676247424, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5209584150532849, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3262468941551367, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48586000819359126, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3262468941551367, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48586000819359126, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3725023982012085, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5122161929575203, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3669823180697858, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5161342210942241, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.15122975716794776, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.38379689700174613, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441603162109487, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3683384333457243, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.535354195733138, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3747706572497648, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5209117920230769, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.30492393156823283, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4954261476108242, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.364284960767485, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5223164970771063, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.24024632160930773, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4527249312450521, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1031071354905375, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.40879569940654853, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.15720527174368745, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.375220686806908, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.19598591795373435, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4825413460017427, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.09490574585599613, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.38652552310013205, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.05866055921523065, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3771978909827911, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.06799395054035348, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4163550141836907, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2240898502507552, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5415491600496339, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.12258998275054492, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5014302475974287, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.15984232622995367, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.47596521065582126, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.05266885854027921, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3551300147357464, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.05719178035180127, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3600549294506989, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.10585600619046985, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.07847318517164766, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.13851721298930605, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.27645966460351457, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5766041891106223, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11651389750816872, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.47506935582480836, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10468075025780492, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004016785171292, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.29817955693860293, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5420613611826939, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2999813280866963, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5482034617789758, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2717321001858494, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.038280866664809454, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.06731462154808254, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.39476633322320287, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3033134628026753, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4755698162289715, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.012770470304307417, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.107571889368401, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.23618922547203738, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3941806817944268, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08589449352149726, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.27606498698754317, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2090466302287333, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818993801801359, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.18134669659602926, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.41766733780408843, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1536201950485081, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.39433169171016624, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.08333580646188825, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.07595651689900781, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.06962441525660756, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.0035612535612535618, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.0025237297858549763, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.47556050119224164, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.6011398608200079, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3048688483989491, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4231107190425069, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.15204349869646772, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.35443548276779535, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2582643227161482, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4622766760642943, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2130807529115382, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4585809667123008, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.12498291516147136, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.12312267504265355, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.29443275863324925, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7017829861193574, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7743327021667388, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6961795371760597, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7859480663394858, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5643442092080923, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7005543453411931, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5432312750246535, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6677259864784132, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7108042922925358, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8180084374898081, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6021903435236307, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7189230568243182, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6837528314895732, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7968789890147058, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5810419951447446, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7223623051132819, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5810419951447446, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7223623051132819, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4391684160269219, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6371098202414471, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4063427008301977, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6422753361781052, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5124564255504712, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6584075128969283, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7191192065120268, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7900451518124424, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6457054670434149, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7698170823293852, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7555875294328935, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8049022687045564, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6443411340522405, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7335999563315522, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6178921921090218, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7038448765193518, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5127683744264765, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6392337103594719, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2285369650225378, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.4750387664265888, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5975003598259766, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7683913390959731, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14118350058219528, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20431837779877604, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1811004938014804, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2649993136544717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14089011087858522, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21944603811527294, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.12501819027374758, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2634990040622181, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3107963896402511, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1887796230243076, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2360941227140328, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35939098278145853, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1700831186979049, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27468331002901497, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1700831186979049, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27468331002901497, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1273192735797341, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22231961416584312, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.12807695305878491, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2077034261476462, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11380706349927083, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.18741202960919692, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3487145358887869, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.48564425156445185, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.16283600677994822, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27435920896036364, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3360376952328008, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35297640449956286, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.21687218788036394, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35659125027777805, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2652905482508091, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3682977696924544, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09554138026891966, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1017839169529136, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14549060082020032, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22054620758680943, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.20721924345714232, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.36475932190367044, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11386607947762988, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.33564583347921473, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16862356321891248, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3637462812267946, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1485432117087218, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27594605375708126, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4266832189474551, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.12106878595868109, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3192454012719998, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23649053182388327, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4127382174759535, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1414434097479761, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1392050630226846, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2392792151449317, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.388678103641788, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.22356252878437638, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3697725657006299, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15789014160632847, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1269046173814566, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3259738911297118, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4954841706551886, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2971168748740452, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4617842175733257, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34101364633474157, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5758572581135913, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2026639468552004, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4471011187469559, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2826834682529912, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.45013583426187376, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23193857266744913, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.39408808540478185, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11622323415479685, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.10826694406224016, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.180048782148418, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3772586334343914, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.46442643702863534, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5519480629125156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6268941789647348, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6958291103494518, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4554740717077828, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5498766350188072, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35817810808590844, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5012707040525209, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6297960258710876, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7117676662366008, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6297960258710876, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7117676662366008, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.570135897056151, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6801332690579707, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.48202275489653057, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6179256582013561, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.48202275489653057, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6179256582013561, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.349335635815966, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4827709277987172, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.31654831990661464, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.49081221368735334, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3145581711998323, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.46884369852228663, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3947812939950854, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5373384617862703, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.570135897056151, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6801332690579707, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4692880637764782, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5773610754678101, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5796814083647206, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6364369549208913, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4632588853163901, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5787305109374016, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.44382744118389234, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5238682600368207, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2985280444159845, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072627289039213, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.49402195020645817, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.583821485566765, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22837680015088951, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44164180234500505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4151474543103342, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.638952468710771, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4355097603079957, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6511365998081735, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2468185992183292, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.46792167630295967, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4295348440736816, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6231131340408064, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.37475590201160436, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.549178868228435, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.21305368975019265, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4371748197696026, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.15170197126996135, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.38724812547353094, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.16829349191968618, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.45156426303210995, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.08919951949408464, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2986174009048306, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1410465419512601, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3956605149834367, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.10206140509578326, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4176763688729275, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6124700716856484, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4321664312116736, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6410028391476785, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.38791552573256816, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5723637874192081, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4305675865000082, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6312508299648723, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4305675865000082, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6312508299648723, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.004789272030651341, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.011973456545827533, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3002149853465536, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378189160780977, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11634129390828839, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31530902302000635, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19544795798162903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3835451743665027, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19889333501994313, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3885583772632557, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.24480102898506534, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1768354493171793, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.36843725720782866, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2623806581920467, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.20876900081884944, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3981381071356935, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.21782367859117502, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.21355445282160365, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.1582263258709324, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09477657379758099, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2985135749646211, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1086652235986575, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.32118144231225637, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3184011333042053, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.475779589579386, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18408707168815167, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.39359335046927674, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3530704022752377, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.521530381948501, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21574854574751035, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.41940323708656974, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.33615439462874475, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5097369506167734, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.1441526877675797, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.058854097785805734, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.17240019222052141, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.23425891587078498, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.359355103997122, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5589602235417395, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4267520229161, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5518115366540288, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4248870612387681, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5641041633033193, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.40429429626811253, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.575799986766, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45381532807317354, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5943177066932069, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3877240689639599, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570859361697285, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.37917766663411384, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365794450039074, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3166333840532722, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48240256356216227, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3166333840532722, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48240256356216227, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.23329145933277767, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44291475401588093, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24227648418732528, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46146827574639326, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.167208802961431, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3830062023934698, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.40977628070530747, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5623921018498135, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5197186862359199, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.638763803741213, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30626379803308257, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5077543267123376, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38861707449775285, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5432656354167995, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4014604107110108, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.555936399533357, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30924707690051173, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4978251800547365, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17706333085447226, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4047932836379997, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30904104300309865, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48677056338263186, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.35551034193127495, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5627284645723449, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5039752490702457, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.613669501327356, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.36932295883897953, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5524455184773474, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.080331199191236, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.23021641289829473, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.328497880819844, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4983088156516304, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3297358210077752, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.506224157487821, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3221305290185444, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4866081657424789, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.05537847775470663, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2899958464274292, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.05537847775470663, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2899958464274292, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10563809356628297, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2323275601638909, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.07545740243040912, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.20719964403553334, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.07954107728679877, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2990326103210471, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4592557039164775, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6121399518555048, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4926928484742955, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6783058705579786, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37818447598700816, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5516941276443429, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.34591973979258805, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5197016245837053, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3016434352360353, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4694223786765916, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11220790698385363, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3598119301621503, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10020997712284248, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.16327778043310373, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24470192769722524, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4583472827584427, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088535943352446, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.625202596789752, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.562048819850726, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7192054483864224, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5550041554031738, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6554946147279708, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4406896260480816, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.571328063702761, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5530531361065767, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7562385559929105, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6077585258730265, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7400152006566423, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6260375038358343, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7803415401430737, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5078162248208812, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6905875293089103, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5078162248208812, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6905875293089103, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961648173595504, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5022745285039809, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.42144206716572813, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6448871869422788, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.29731554668582794, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4888827115549278, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5803184114968359, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.759870828515734, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6359088668046915, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7818086763650907, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.37446819995007063, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5965995710194948, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.47237086893932345, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6521003933528818, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.47237086893932345, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6521003933528818, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4311779921348334, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6057676750393428, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.31867018346252723, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5261433842307197, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.709255033821849, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3941975148525721, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5191046479503385, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.262633940062176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.41923206553744197, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3850172427136058, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5264633431241114, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15363234192450648, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2697691812908914, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4379048206045949, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.251174652769276, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3146726146646545, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4709531555683, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15413963411151027, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15413963411151027, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13978782442553714, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15182422135400298, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.07543938388328403, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.1621827388698608, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1898569805320716, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3803223837566382, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3000233133055524, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4363867386432795, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3223419048219805, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5205977846006183, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1764046491640527, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3464061249457313, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15503233293760701, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08435396018831114, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2005359551536709, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.18856799944599728, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20466701735848536, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3025868321081519, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2618161850312308, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.46946589430056646, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4395347891601966, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6124294442602769, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.33711507396378565, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.568383173179082, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3222423455530638, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5643748237802169, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3260557929227487, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5464327822869444, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.40332947519159895, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6228862504867423, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.44898438516407524, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6143783254714975, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2512754000899554, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.48284061818613055, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.30916859703841926, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5135112084289326, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3688091032179454, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5618587771651018, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3586319156181016, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5648853941150953, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.09472381928564721, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.11154711865013102, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.43155890347066467, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.619009447565164, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3065593460722296, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5311466716037293, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4918904748281632, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6551168488426827, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.1456085160245154, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4152567008092657, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4895484390664816, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6562187218599307, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.33823684808781096, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.552151899411627, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.30284835181827113, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3991070933698779, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5925781167136664, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.13139413594401378, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.2490406851204271, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3112317271723676, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4361597730424806, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.36717349445307196, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4938275160496472, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.32679491753274487, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5061357551531296, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3784649355444829, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.47122443560931077, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3958704329397872, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5196505496421775, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4850978822371748, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6352541213631081, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.43779970215988684, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5678763862130168, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.43779970215988684, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5678763862130168, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1597896899620504, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.49086962788909555, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.29860454922343194, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4455663313013989, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19050288241472282, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3175856028332101, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5117916534946495, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6878657354957926, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.537967690201565, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6655488114403648, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5420104447784688, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6785083781968722, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.36033300506928556, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5012711439020835, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3603103614264621, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.48642257026995295, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1717511667481313, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.408433531395965, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1975941306622024, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.27520597189594015, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2983588344542972, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5041326432925124, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.39997687282627975, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5906362815628093, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5208833700498166, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6368157603637512, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248317090186142, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6599904808886127, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3727105527986878, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5475770179024447, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.478854281434795, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5906564991706454, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5354397296450966, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6627191681525589, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.49612267717096975, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6218353723304708, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3857808912695531, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5169273955057163, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3857808912695531, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5169273955057163, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.425433767253164, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5818873909634904, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.34959700947178757, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5356888229062392, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3818308290157094, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5561897012107097, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.674363352915248, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7903753469468177, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6000288091649221, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.742950065256247, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5781534325005774, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.744306267545875, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5809669142768361, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7168714328499641, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248317090186142, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6414304239299259, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3976353643835253, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6037335600385767, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.21025696416672812, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.39080193524562357, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.43929751176084064, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5889147220911218, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1743988338080954, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4684683280769817, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3335763231736967, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4822714438205533, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.24628203589181794, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.47490747232624714, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2865635502271963, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.43207538722163397, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3737090685822519, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5762937357995392, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1148153812028893, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.20549791555765032, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3501847839621347, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5180344374850399, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4105639021889742, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5404388902987453, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4339080813601068, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5601377134953289, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.21195371406845798, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4061072764676573, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.22094967664887016, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.42763903484460886, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.10289124253714341, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.20708037890207712, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.355480478065782, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5288950976571054, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.63483775562831, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6773728017430378, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3483718729405163, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4980927055323402, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.32488958976180393, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5442618574958996, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3357171510229708, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5153790879430198, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.25281168697394946, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.42994528157525946, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.11781301843777481, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.20219186255257193, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.35111125142401484, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.47360088422177105, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30485765641951534, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4770499606054267, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.33469420519942356, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.40802446160905737, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4747315561546192, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5626366882998202, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.13084917716157846, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5354938906263675, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.635082221784033, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.44404782758976735, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5596520713104719, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.41618377742781326, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5456804815374756, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2248526055752078, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3876111278076935, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2248526055752078, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3876111278076935, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23360210277843085, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3931394603219493, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20737479056525865, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3654501418969675, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.057847968205097945, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.09239794083005971, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2977932227461586, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.45438903925756086, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.46773223798622254, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4647428083266434, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.22256734278223791, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3765773373060594, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4575089222077589, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5374740662953226, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.38366559823668656, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5374740662953226, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31142819692835494, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.45383015002100824, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.05780839041831641, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.09190080998528566, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1704823945286264, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.33422996692133256, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.249036269104499, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.42116420214640826, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.46916497710648375, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6210364770630794, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.35346737856844224, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.46419896019347673, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.05654743603405948, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.18840545240346762, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.49297132192603577, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6254707347799899, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4017730386568173, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5748162160243933, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2719326877457978, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4943569700727416, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2026214534196293, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3813079901484506, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.19111765019965316, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.354899732054626, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.09775732366013794, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.20273747118892996, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.23026081920559804, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.46301227127595196, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.13065113298388567, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.21708673939419376, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4570682779614497, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.604396796818713, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.38063074910847605, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5580442930261992, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.46418492741290446, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6061288560408914, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.18865524528994837, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.43910447154292387, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3187378316273493, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4900083864782776, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2431364795864718, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.44603256525262225, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.11367986929580312, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.2241654517232526, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2786526099901128, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4227880339868615, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17361047672608262, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.39377589303767235, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12646071698454284, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.35399465579414496, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12760083087161012, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.34769412669125294, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1006991958878112, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.17451411450550786, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.188452798788253, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4226648432088132, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1696179558586946, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.34732496665446766, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1513162576311821, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3665181321185458, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.217376398756242, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.24996060380547028, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.217376398756242, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.24996060380547028, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10731205149587686, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1863606234709251, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.09474578264934634, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.17340039476868216, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.029780953159454848, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17166852645504288, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4562694529579096, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12110334922926674, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3845930521603701, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1751636962208055, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4465249672703667, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16213942897865444, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.42390959990523525, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12471175243586738, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3393879612099294, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11913648728109437, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.33001537848010537, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.08856387214544856, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1467534744121305, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.18599419313246654, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3092478316768669, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.5294442646627652, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7281375072835307, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9586507529693243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9586507529693243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6337520241233826, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7734740773636255, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.7096224667917136, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8862932371217843, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.7409995286953545, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9017022389242945, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.7096224667917136, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8862932371217843, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.882190724997149, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.943123392401343, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.39793093873955576, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.6648788692343665, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.7147882714185101, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8408773556139596, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3359230828063256, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.573086119969458, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.7106361351765512, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8390104107504974, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.7770554539970614, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9260678176630538, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.8767740197085658, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9055624233154097, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.8627586293513119, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8964369716535558, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.9082489095559809, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9677853954871374, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.49475425785336474, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5906548177852229, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.509958011324736, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6038542862803142, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6369088066655332, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7248557877146983, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.594816243433451, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6684517153270552, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.43563519746741536, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.562127347302261, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4178102248624072, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5541365812180158, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5425651337252639, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6573851660329229, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4378498065475581, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570867906360765, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.516681983326779, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.645063041588047, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4504787313808907, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5879031108750592, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4231763152174624, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5646316458317165, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3242957438175126, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44238355495975834, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5808022163810818, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.662799816502246, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5847143752213874, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.667787009741639, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5267859045712694, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664055100184414, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5090592077494646, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6898128904382637, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5494728224972747, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6505680718898761, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.36981912815060275, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5379681557823732, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19294507404030448, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.20343032850931325, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5464960726645379, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6338335574700596, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.29458137881791246, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5492390632020873, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.154638418688186, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.38427876360308916, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23665785333710115, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.38562451484470484, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3285020660210441, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23590666750948872, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.476513874713239, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.11918572021577575, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.42346176272501185, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.18467608126336754, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.46127619811207604, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.07498804358507438, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3224295470390459, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.07470955153973807, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3255729228708695, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1164634565396885, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4145923720314824, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.18799927787192594, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.1897174995868195, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.20303479872521724, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.499471239852535, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562052802049487, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4919666752433529, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.16382363658413054, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4680776467877379, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.10184169068088972, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.41112160104491985, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1270469747619846, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.43900404845131774, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.27749477998326616, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.09802074842275396, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.2574156136422799, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.39153918186476716, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3067124925928069, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4473439946126318, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822493397640795, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46507913833761805, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.41902736014327063, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6029944827947636, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17367203300247086, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37144616993784235, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2129735960909034, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4468251761341622, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15654619435532813, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.33610131630894263, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1379310072046867, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32409635064062775, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.14502740485318713, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.14502740485318713, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10645321323333119, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.10163680597032436, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12096277958826776, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.09991777681092474, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1480417518140296, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37812113415438264, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20983539712503216, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42806652596319195, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20341288916638242, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4150437746025893, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19644453621831479, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4060267989033577, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.16666084560231476, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37466968251155586, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.26352481475305845, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42508029689256477, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.11916670082698816, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.16885523091496046, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.07336049608099318, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.292435357035135, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2753456537058715, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4157637470934354, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3360301095319906, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5015691430360719, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32646109535361356, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48381777748044785, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.27612097589294854, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4424892418298757, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.39819019648793813, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5022380875494022, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30042999090790634, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46930807317618495, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3925406773051543, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5373872822631721, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.28379653782470426, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.42758189890017045, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.28735436056367314, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.45643632651343036, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29649973924791995, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44586209023520257, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.27124587961619956, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4127186603775186, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2738616869466716, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4556817348961919, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4053606506369619, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.557889530040128, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3563341748486736, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.538853156704761, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43788473639376585, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5875237951474706, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.34959513901099026, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47278445023007715, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3448260128141926, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4979112231829977, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16350681949708384, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.42520459556859463, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19367943681771346, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2703053606018132, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.27235655766695105, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44776972516245306, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1138894686892089, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.23532055968260987, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.08049386277412464, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21735060795559732, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2956525254279143, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.10413677522340241, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.12526164583036564, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2842745976160273, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20293026537059208, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.11502119605241674, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2826072681332473, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1944112173028356, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1898118992024418, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18905238632644572, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20943523270839587, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.09252846219394611, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.36286179116082984, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.40065324863994456, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1584528066157211, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.22740112117788341, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.30292463087874844, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4012206532714064, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20832178285518554, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21681791150611493, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.15256354608052214, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.11808098081213741, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18837255784699983, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23882935646817824, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.31980378769803924, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.322315347128407, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.38629355287842254, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.24060799092585258, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3958883558253347, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.12188389833426287, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.20788063249150548, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.31368770007573615, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2623758746330441, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.35746220695051406, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.26005633892051505, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.32347988292946617, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.22289219397754548, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3382837811314792, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.19194570483164375, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3171637914304021, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3391071440787826, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4066486585212142, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2791036570365081, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.34479412937297904, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2897781370622369, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3668617746176508, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.34599462014997945, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.42876949366685435, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4817073354488476, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.5422050630644166, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.29222881654408056, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3674990639831242, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21592919809500394, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3174152476126488, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.22282171488143487, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3388318294292253, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.11123469780808024, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.15067692076736147, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.0356143901053565, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4452444343846694, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.5114675834840228, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.23183307484609084, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4860771998642055, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.291308139574652, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.47517712618294367, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.34695219571340735, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5253608470434756, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.22507903959381928, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.453032184620028, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2968580868691674, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.523112681253668, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3360386791401191, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5211853611973613, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21675506796946695, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.44636018244046766, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.14568024282778402, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4239962379021346, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.14568024282778402, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4239962379021346, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.17432712705823583, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4068910882512891, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.145362470339685, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.37199566857516175, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.13462826238682474, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4060253335949082, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21554182396690252, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.44338956524057765, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.39963516628793516, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5955868994654638, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21382760075262391, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4314761328726087, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3047874430139783, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.48536344979680074, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.29755613285398774, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5046099633967354, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2255936469724277, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4396597890461904, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0638707075030316, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.292437776821144, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.24544572260995884, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.45430693560049873, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.35933410857228176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.48586232132101626, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2697896652026255, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3998510332125744, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24373510497535003, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.38614580002554555, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2866979519779752, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.43288926857590915, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3354179041034966, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4707359468602942, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2698452317086067, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4582070027188063, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3011454888332037, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4262131523304905, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.23240224448374067, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4133384318396084, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.23709961272723779, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.41517687550199384, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.22860205692285332, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3182044233158764, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2621312438616883, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3668283434236941, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822052107603345, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4198020225738802, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.32727015498952, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.45074320734250733, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2866246702095007, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.43367741049920755, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.32110411465020716, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.44807862406147775, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.27913282158047337, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4260697288935173, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.32329508170352383, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.47345326558901635, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24154624906796804, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.38272979449164946, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1874127686625618, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.27663845749925386, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2593593634315739, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.43271674298316926, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1300236096509615, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.32525153283102953, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.29431947865853453, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44837466661278663, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.21254942319876402, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4269647768540029, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.18418234612754833, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.39266182480545436, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.31289401059475375, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5097722113265488, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1632548713391985, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.31655004838281875, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.22269084768108507, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4278034123121377, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17454130529882292, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4080884742466854, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.16966449414791862, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4003039896117128, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.14520318268031004, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.02961162523639311, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.14450400718516293, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3304287005101634, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5073607253994155, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0943246418621213, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.34805553300777864, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2351948770985025, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.38703230999414506, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.20519183872026955, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.45466354608754533, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2134578088104912, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3859086755695303, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.23713426834274276, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.036603349129414556, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2175733312586878, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.445399235565606, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.10154990983620787, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3241234761287709, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.07911223895726585, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3284566286505505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.31913963266610856, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.09628686982734429, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.1952154860241876, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.19130385778959316, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.38351236751442097, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.20061283670702684, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3495758280859993, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.07998273930895511, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2553179648394861, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14764688052496655, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3128469009417252, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14500816720191645, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3173242925215282, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.04405264390672397, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.15557624503843326, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.04074555031180568, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.1496552104956363, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.1484241683066219, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.11676877515408932, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3390310485983043, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1479505098524279, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.34511067058207273, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1536989560019054, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.44762901138635575, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.19120817575042504, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.40969250228345677, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.17083517020083475, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.39150735535099584, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.06471498019604267, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2882933467523094, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0646129461242914, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.15283455536341573, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.26314054882588755, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2875779877950604, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.40209911650829044, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288944381277255, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.44408070821988965, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4114907933318661, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5085164164772458, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2002896040873864, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3388229412593097, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.36008600804379115, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4853098816714262, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.20962095850094545, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.36777884239193975, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3273734953951328, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.47406821010356615, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.284161300598023, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4218648610976154, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2807428198554027, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4058440896280391, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.16680737576321863, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2663384154922808, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.07882543593371204, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.0963282470591315, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.24259122722147283, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3488665575390696, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.43223058781595636, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5066667712634532, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37877689433359335, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4634054470510535, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4740262814051875, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5891073641767389, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37972643305107756, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5113424919950917, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.39706108097932113, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5301051151452268, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.07569680446415558, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0811511315198306, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.09606380022225328, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3751172096292997, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4914945427255551, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28997498044330217, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49213353233430807, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14097078788452733, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4079828299257035, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.13141849076406048, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.33283942813493117, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.15295864444581941, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2500778672893524, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45494893323174035, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14376690480628126, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35572014853539113, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20100292863011363, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4076276304952943, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11078402685193336, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.30524045309162195, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11463104968465199, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2909861509746846, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.09390757917258814, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.21248157589242364, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.1569787205649601, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16690910711509896, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.32737428538988683, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.413534200628771, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5332641003918315, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21322100033563102, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.41366535344804717, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1637342873856931, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3881970983070915, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10502753986446337, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36830254024142717, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11742665343712573, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36563345299992134, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.24610572619268933, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.05267625614923987, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.15350182288880984, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.13553404800740645, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3371278820535014, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.23102354810546433, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4512153003070141, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.24080045866228475, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.08097181036179749, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3053595249968307, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.288950858373021, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12278113602323072, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.28569685113339227, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.09049008739913425, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3899650893074265, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1548512533490527, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.316352437998455, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.15710062654473528, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.18129439798301064, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.15382264097525464, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.13408198758732318, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.07839621745254671, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.1846437553302137, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2622163181697703, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4929088970575128, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.22431793519548218, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.39604227398105574, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.09160060296520073, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.22884503589536276, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.08241796975945151, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3466608727751255, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12134082961666683, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2682674585843727, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2373706275484742, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.13482603731036022, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2697371983386055, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.1387209968695978, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3380551337195283, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.13529314038135454, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.22343406175513267, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.18403324079901667, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3179738040092695, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.1620137043520466, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.13304020859999266, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.27172344687178274, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.16488830630298643, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.10763639119133948, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2286324938333245, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.21562480107796922, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2122236502776767, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.031156779833853843, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.16646420617308932, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.14626163469852646, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.0023775558725630053, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.15631796991907554, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4199470043317492, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11647083703831308, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.362366272233946, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.07727363175597884, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36985472300697503, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.14424504302299884, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3278751518859767, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.26771553177100066, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.37949649974580374, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0651036528704352, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.1687003583856727, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.07132774105746421, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.06271293136511706, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.26651166138562676, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3079971615386303, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4820111161799533, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.28695849032593473, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4998009181545032, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.23469484606770763, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4172389094650108, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3192819492326079, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.49617011044543163, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.25575140811008257, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4498199143602961, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.38904911993741875, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5272334688781383, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3366798594244885, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5032290575942354, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.24552764013707506, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.45959682937494845, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2937994669861426, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4721695544802295, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.28514520814329086, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4757962294633225, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2557023064185815, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.47341372742401866, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.31309768184871206, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4792341787454904, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.37079823816873486, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5308246783220635, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.26184129808474554, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.45395719974451465, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.369512066317937, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5401956749669948, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.26656105817789677, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.46879180441648755, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.26656105817789677, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.46879180441648755, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14794205573193786, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.36475095585467476, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.10174226364073938, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2235019520504769, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2648080421287714, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4674196289628547, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3264083012851069, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.46032850662611646, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2593432509712634, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4745626718142974, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.20160065184355866, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5098889524317596, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.23981955551135148, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4808731682191636, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3308717914450874, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5800788400879513, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1666398736971546, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5129077754414282, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.29614367235735506, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6045301048147214, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.13471075888991113, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.28432329395160805, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.13471075888991113, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.28432329395160805, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1432084252186392, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.28641726287478714, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10515245723378963, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.21833110323753446, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.13817567422545388, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.26195281597782516, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.4449695860343058, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6926365319460404, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.41135090225869647, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6201370795994968, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.41083224778511057, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6633295106886236, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.4113791261919629, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6958747657915029, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.36285371273643874, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6272308931328329, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.092853803518812, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.33364983045013846, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10570785968335994, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.21198045114345473, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11368320018193068, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.35732310520280736, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0832791679921855, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.2503889973495181, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.20368338136177952, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3653296593218242, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.18108719755184957, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.332537347823435, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.15420338050642002, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.41396539477086286, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5525295779105852, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.14658698509712184, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3019087046443289, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3199269653277632, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.46348573637808615, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.17066150633798174, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.17066150633798174, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.12255408397776268, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.12110468618322316, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.11216011726678803, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.15706190845355475, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2065671630912018, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.38660038914038075, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2960694463333958, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4881747008412317, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.24966103324264236, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1657794581694396, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2349230687077016, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4194990055964544, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.03494730820037606, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08834844971867042, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1986615588572577, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08728152780371878, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.33571000196089057, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7505336182671021, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8401910628269498, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9289416300153619, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4450050658086207, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7558874882119336, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8107492451395732, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.900032747778274, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.24688498672025874, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6300112897041039, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8107492451395732, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.900032747778274, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.4294674116452206, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.4294674116452206, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9289416300153619, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41682189465797687, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.782362930596065, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7406375008540003, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9160988509714175, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8107492451395732, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.900032747778274, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.16195570128532405, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5142740245749214, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3132252321342574, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.35025412310639736, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6825372617659788, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13453927150397377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10522974272748564, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.22055493694673897, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3931965048763613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13755274871304535, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10397715306705207, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13899941210887606, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10947303419437356, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13725861056573663, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09122941759796505, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13725861056573663, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.05194789152811301, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.05194789152811301, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1350501875730652, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1350501875730652, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.06821148611313624, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.07933317425857943, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1350501875730652, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09408024740752835, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09895358918308976, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.07839951405031013, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.06261389497098568, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11538184104597694, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09204268041910899, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8229812189228393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7267072830982378, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8396959977515368, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6374950652411382, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6643984252563968, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.760856626273165, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.881580297011256, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.760856626273165, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.881580297011256, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5054091115759235, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5779798191200329, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5054091115759235, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5779798191200329, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5821747317554493, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7539119883011114, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8229812189228393, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.760856626273165, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.881580297011256, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5824621545691198, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6375144448777752, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8229812189228393, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8328652216139806, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7178970818142898, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8295858852824634, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.251696695878184, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5710821658681214, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7886148242134857, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3480442076026084, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6142483232997242, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2852636439147137, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6732018003142922, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.18409929989356164, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5294826055875641, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.26529518334824453, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.596699960316198, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2592170537135687, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.48962631520102895, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2592170537135687, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.48962631520102895, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2104783778565715, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6117499551501043, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2104783778565715, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6074381660797843, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2371332024655201, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6388309463070028, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4747354911173249, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7360286800047513, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.19850823739068116, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.545876357155442, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.589811312024197, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5838763481839316, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.18951629567590741, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5476491102899993, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.31072931460421827, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4306285422638574, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.17401517708317762, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.45006261596496794, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.151240443751577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224869587588239, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.151240443751577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224869587588239, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.26860011657329247, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14172292406325543, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4762857001428092, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1435622311718879, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.45714671497681403, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14172292406325543, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4762857001428092, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3022474972507704, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3022474972507704, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4800955244005148, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5401265646908128, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.35123681047474764, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.18482936243672016, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5172215726655364, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1786870215027556, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4652953618999841, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.16234676720992364, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48137970077362496, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.16853790965501372, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5242065098084487, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1581263594825012, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4522083091147819, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14384023999987144, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4651629740258846, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.07810235385630719, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.10401577613691954, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32252336426814965, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3614856639698008, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2255489037266197, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3954925749722234, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2927057121559396, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4330945753016968, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18038302998635977, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.320678468026793, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.272143800067929, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4053920465587089, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3124325727595954, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4415757258745415, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21850594525107195, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4049269026117245, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33278034531051726, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33278034531051726, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21233470585998818, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3630016390465325, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.272143800067929, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4244951818012383, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21850594525107195, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4173980390626746, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1958598294695433, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.39989578460637004, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22325877055095214, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38760873730223866, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2741229265391949, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3721657350281369, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2741229265391949, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3721657350281369, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.15573188896329046, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34400122334184813, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33805023952655533, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.23398197530631124, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2929807168354841, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5975595069845072, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14908960803395838, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4761746966391582, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30778741582971547, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5260758146680434, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1935951733925871, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43667702869251973, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2466674257522263, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15912425773278951, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.36849788356327, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13380161378318955, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3620746905979261, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5677534942306638, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.684329671666446, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.623977125888761, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7203812768232026, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.23731319709526777, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47505928950804344, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8787419089273848, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3463101483231968, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5237307224806943, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666935927206881, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7886059879769752, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3854501214118697, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.595779023757305, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.31011575752288345, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5868493159448576, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19940445989088912, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4140097710901331, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1999934463074552, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.30520457148036917, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4113125177363443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42808075762838727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32685141385924577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3758692873615971, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.383916695249631, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3804672236690253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.433708341935832, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.41719958093258547, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4623764370897186, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44543323722408085, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4678134833959513, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5051480556620123, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29528242415723854, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.33278285164387655, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19850823739068116, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3322089542607952, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4028998029112093, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43422338821405304, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44543323722408085, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.333388748608888, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4116646457580635, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5894159589207006, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.633437763335489, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.47426640493104016, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.533549979656383, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.46360731056064436, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5726015901952585, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43771936994910393, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3906219304241721, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4740791147276099, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.33573064840973227, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4290137254376642, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2041405149858879, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2728627798814474, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36592034784584504, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6237774736059616, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8500131524897436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.573764722928549, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.817979859532479, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6224956012824276, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.21449459478473423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528501353073614, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.22449758011137355, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5326826664261649, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.26697411956933875, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6328956554106696, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27341185048222727, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6411651849711889, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20121892469391917, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5567939082957601, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.19228544753133758, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5417764305747725, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4041187386794465, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6888233111124319, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.9086549610666288, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.9538780144669678, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.47629019423691704, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7299685603603432, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.670001214025099, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.876386276114813, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20891853890830714, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5547930109285607, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5645815242299279, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8151453923340255, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972046851135996, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972046851135996, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.15874376125672243, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5342000553124993, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.24570408832734913, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5699365673055954, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4388504279172877, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.808070563320424, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4841156774078945, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.8256726071164937, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8363600587440573, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9912737182609732, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8895260356363631, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9215559912711291, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.6018154975998465, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7669980679050217, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.20164065196183215, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3923533979663226, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4212555584968603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.44264089366400194, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2750774388281557, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.34712156908889796, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.31322885062380607, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3821582738832969, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3286610737142835, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.385722786993942, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.21171273476282318, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.34680801952866847, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.27483211854002193, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.20951428943259978, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.2989889871112548, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.20951428943259978, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.2989889871112548, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3332411632883488, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4039853784752083, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2218501907098814, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.33654368829688636, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.11732201052820276, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.25299836015791066, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4081354056739722, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.47955174657690236, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.606820630365184, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7266797931843597, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.392022469660947, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.48059504328652813, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4081354056739722, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4731088237118851, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3332411632883488, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.39933193113530874, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3627171579166368, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.42265918111012574, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.20642179908801722, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19178500195247952, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3514233824120371, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3471790743028735, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5354826964964929, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.14528679532351443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.45626264176882697, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.21146239923180532, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3672220683588613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11365352023191169, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5427832684043266, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.406208369292942, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0993195473228234, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4660581946805371, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.25034600728678114, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3123576615501104, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3123576615501104, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.14025775160081475, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.37306669253790053, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.27805272316398216, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.48815941420187975, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.21267746188711148, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.866397551781362, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.9369949537059603, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5278627722123207, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.8154379350117309, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.41682189465797687, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7155178722189985, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.37420316460821246, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6713980677832108, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6169169270416248, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.2823945343575434, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.09520646862489263, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4231985179035766, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.47631009147745074, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6668706097750393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3312076918041707, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5907489769215882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5419642316694008, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8375813242343603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.45069082245075975, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6399673599980337, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3044867545327882, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5152255320542248, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.37251337991409605, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5810872572798261, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.19496249079519765, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3404780052299898, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3216265867489782, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.36313460166197037, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.648844691127488, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.29018963236087447, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5428475770446293, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.26220676436185975, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.40041995342318953, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.33084780351073634, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5879159712556987, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3222538601891173, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5969957309841242, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.7708186875078075, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8284786787443572, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.08603520723426224, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.41785071883461133, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.37793843728093646, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6768016164890283, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3077932519058548, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.175658807429611, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.47157573411560544, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.20217803037339238, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4743589083194767, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9048530940348648, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4267826722481737, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5025756331454715, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.18331704949485053, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4681674930025697, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4346232049071254, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.47931444666606077, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5290208228819253, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.26104909033290696, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5956068369645927, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.446338868007041, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6877687248093648, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.32961432415781217, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4546852631699836, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7654091839521726, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.20217803037339238, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4743589083194767, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4278722851826281, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4952330116157305, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6324855802639244, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.12436722085116984, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4638123953882846, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.10008350737965103, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3399171525144127, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3267507236200133, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.14710052131359536, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2322531742374544, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.19427446513842178, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2821379316874468, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30793937214153166, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.39271105518755994, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1928576545653753, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.27080894796384963, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.15875722180934987, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.335313700439635, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.21800193956058223, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2974641182469979, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20828838183973028, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3002418280717453, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.14942927915034346, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.14942927915034346, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2234473632117264, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3517185856118227, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2234473632117264, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3517185856118227, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.07389245013530919, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.25306188056493334, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3181932375842872, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3060737551255425, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20536337741589905, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.23621003955621192, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.17004486924634224, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20828838183973028, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.29855842731164584, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2425056620207466, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3842334002689132, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.14102929105825548, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.16702356077048272, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.32184315197242147, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.8862476419965991, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.9357668560693397, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.5181212746323438, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6637469240656353, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.760856626273165, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.9001274946261998, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3019157872635644, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.46982207493288186, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.439933170526582, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5265527952016648, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.46269559069048716, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5813300951224054, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4962806188414642, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6142754339875535, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4751132438608344, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5331125668806641, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4751132438608344, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5331125668806641, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.6813997516051327, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.7839574900170875, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.7964269516620873, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.8589615670636759, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1365189729052536, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3779992936423843, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.9201441893603447, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.5338072105998289, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6564887791688573, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.24343304284910333, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4684960451084745, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.003075030750307503, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.45263535623901463, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5592911781982396, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.49014451735562026, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6491845645828161, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.013542966735554371, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.5784879107039426, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5683104420414816, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.34190945667692124, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.38471892347518344, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.40122658664791466, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5789876781041586, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4005128871429235, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5795148403403252, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.40122658664791466, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5934450566124282, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4499781507152021, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5255804479733618, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3033668865762665, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4694590302823891, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4499781507152021, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5255804479733618, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20664181816537017, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.323301646457105, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20664181816537017, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.323301646457105, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.31211920341047517, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4859322738480559, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.42902556537105646, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5591695775882205, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4028998029112093, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46152639159313, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5275959726465806, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5562472188484809, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5275959726465806, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5562472188484809, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4884391791679525, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.465693385480142, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5255781733673718, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5338694785332231, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5680635025297611, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6428616302560459, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.25823077599534505, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44626329440667106, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2352761275903773, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.47426640493104016, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4991169086641303, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2803950119994004, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.45110582913319186, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2818720423903184, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3950106268119009, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.28433291815307693, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.48867856058755704, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16652408234019814, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4246320308167972, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3013982925179879, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.21314568971111159, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4218207204961227, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.28743011594299434, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.2096484850050787, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.23587428664438234, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3087246122445108, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.32547779910215985, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.43052436336054556, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3239539356655841, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2803950119994004, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.442887905100041, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.21951524426618454, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.42045058593468565, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3807134866446316, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378980704892395, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.19667812291861894, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.40012121479924007, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.19667812291861894, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3952782445462564, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.33156497804918816, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.24709063229627756, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3607632660254926, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.40533142214730206, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6060585353811423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.326434800689293, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4480048055728212, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.32594818888335836, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44761445450402637, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4064626339162435, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5788471953989454, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.26939651839759654, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.49912839527553177, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.18038302998635977, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3896135506969871, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.17793925745339384, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.28012888208050135, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.09585061629230296, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1893202669845784, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19923928734822613, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4663425112609953, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3240779549699948, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4812338546765038, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.23384694577370996, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5096772803089438, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6511024507582726, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.38983691387368935, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816984404013605, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.45873576799767507, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6043854820928907, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3485638068094234, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5963375644080781, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1670445270854116, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3361164827798928, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1845895819969781, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4127769579837372, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14085916416769417, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.33804825970733066, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.36210097004176117, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5007651303154664, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3875738939089708, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6165609648382772, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.19500507550993698, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.45061310264279136, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.35630548449868954, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4957192173723922, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.24826065966265487, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4805206361807775, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3341317422457504, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5079573405434391, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3861304705880985, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5248223977765574, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2974309561779646, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3992519602378697, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5002984561362573, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.15815783405728007, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.29769975373629043, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.4161791450287817, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5532809970610469, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.00869132029145675, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.05604574546688056, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.5131341853758327, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6909093149313557, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6036101600843071, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.45886678012586496, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6644271635045849, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23152888964854848, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4544566268147502, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2819047150780413, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4860988074114069, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.477402596194193, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6645173033113367, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.23324675880179935, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3222841628268958, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12522096668112345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.1209593074330819, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.11422068431853144, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12726932659656656, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3016275716365683, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3029955223846001, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.10593991511157044, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.10763180772579908, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.09759160756501183, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.11968354725525952, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12967052968787476, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.002670940170940171, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2927796599787371, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.23522291570950735, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2646447028332218, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3535161075143434, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.17785897894606995, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.11910610888409226, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15764879870902038, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12596891625474588, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3187271473320672, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5308594426934777, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20158074753947833, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4601245932152929, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21951524426618454, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5863542367683318, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3187271473320672, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5195515317126674, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2641917896528943, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2476165058078653, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4867146510028071, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2476165058078653, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5008770061123741, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4516694906173442, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4516694906173442, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16467029855845897, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4015922868181449, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15774545980684188, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5404685949807201, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.32916817971873047, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7541859578343534, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8925780650702051, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6028817681965138, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8184572215512641, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.35412968165085734, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6270889448255044, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.25357054476878577, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.44844165354587767, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5718331963429273, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48255775311856786, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15844501337268932, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.45565290753593196, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.031835895924415845, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.030384560195149946, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.052306204739960505, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.06823556261259293, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08464912280701753, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.0668660748745658, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08214465123593925, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.043422322290125444, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.043422322290125444, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.05921052631578948, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.06555059438405589, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.042535290268299525, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08464912280701753, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.09113274429924344, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.0668660748745658, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.09023527188022927, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08214465123593925, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.07647983237297014, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.006944444444444443, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.06823556261259293, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.28479942163807365, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3268532842861261, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.16442902274779153, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2560785881872606, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.09594785034023696, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.10599592327617327, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.14500387607936882, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.39811631946890474, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004997065707043, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.20217803037339238, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.14407576460736185, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.20687913047075865, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22828954232316065, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.04127554085191462, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.04127554085191462, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3400215619680846, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.33719880338573494, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.26104909033290696, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2722033387175046, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2116210665015609, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23932595221309674, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.311012661607424, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.17923344640485428, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2344967623081854, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2227227312202058, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.267705544827011, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21233470585998818, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2655126465892259, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.19228544753133758, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.14059624172717672, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.06787002753251685, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.14025775160081475, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2329160565767051, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.13714971174966237, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.40286739446153647, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1420515519076652, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.42828528785491377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1420515519076652, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.42828528785491377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.31941740500496135, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4712019523587867, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2555184829750068, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.445432191505933, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.31310070795186434, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5511747373016408, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1420515519076652, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.44106281582354434, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.26967784204515577, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.26967784204515577, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.3648139824831825, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1420515519076652, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.44106281582354434, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.32777230310669525, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.21544992729237186, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.43862011357966985, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.17460005233998513, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4006764760264664, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.15234610459458484, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.46726908278200807, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.22817395443579389, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5333512480190983, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.22690068744270425, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5346629648266377, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2696565254551411, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5248534524852462, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.21813311724067094, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.42518332100320455, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24258882871985424, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5303410271196803, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2524885334037127, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.478828996001936, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.47788634915766176, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3299895472527792, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4984179060203801, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3152861344254501, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.49782106997559544, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3617640392425988, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5522465855173821, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.24187339355165743, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.25756580609747165, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.25756580609747165, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3163161243012683, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.23687685391328772, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4980341077042262, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.33686890954009086, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2569984870940298, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5218743639828287, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2504503449831057, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5561372885863383, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.40043666315630655, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6786513775230063, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.18471706734883164, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.47575293285704157, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.18462053544069879, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.47706095305435986, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.31048078917785243, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.2922953334862234, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4235355275892789, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.08644426594013177, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3191159905793865, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5859514292713682, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.5894330056015769, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.7233859304178071, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17509131039045966, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.29735424498602653, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.16638505369603646, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.43509325019402184, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1896632646081197, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4171144217426057, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3276463794734248, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5691582878087721, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17976451428761386, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3585067888170081, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17976451428761386, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.343282119048708, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.14582148798471004, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.31082696401117216, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.532789925402232, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.27501295888248234, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.6880908437029064, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.8057610214990305, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.5446619336438847, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.7009531476065792, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.31142220978542806, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44510270344803765, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.4331536929006432, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.6392185687264759, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.4331536929006432, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.6392185687264759, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.18728674627858763, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3939325010546995, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.1878901082125714, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2690293752604482, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4926185189227678, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.22911003908657962, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.47520559779668814, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3842217688744563, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5778946517730051, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.42396156294478327, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5537191420830491, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4392283615599068, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5797315907567191, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4247462710609875, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5232906543197557, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3588098099835732, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5299274110588972, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3401185071799048, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.46989148838686473, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3595597536132021, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.48077684581215396, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3595597536132021, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.48077684581215396, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.37071912413861663, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5215094709752708, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3530655323510889, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5388344492000988, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.40232211736779205, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5438807582708866, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3395665608660451, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4640706250815061, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3317190808936393, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4513622173176303, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3401185071799048, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.46989148838686473, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3395665608660451, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.48690229401918267, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.25609516769327906, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.44908098526380485, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.22422504713155436, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.22783713906265132, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.38679326840946837, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.10752875081378005, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3325883825268305, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.20583079363601006, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.210765781867173, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2130422726507048, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.14625505122941285, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.20063126619058114, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2115210387013347, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.37360356038205855, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.12597257038347442, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.18700830990734377, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.22271477289353545, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.10459905477537781, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.1880220063294874, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.18813687249646147, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.386314894857759, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15613122185272454, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.42694636550146664, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.265789590051501, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.1777383985636279, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.16149927362386943, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.15221118163644626, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3040820655534371, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.687028746592056, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7656691936879507, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5948821108337683, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7754376703404752, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.6740433269994325, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.81875959995474, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4887049965066992, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.569614552439621, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.7248600507984249, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7668237605712521, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4081124128738056, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6001792982263469, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4354676264744325, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6617572899591347, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.26622044491524727, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3345692380236303, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2665965212441191, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.38231331369588106, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5105137893788634, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5591104189798438, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5502408885295141, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5654268254662816, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19445724895759758, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2659249673909068, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.7092625431287067, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.812789308577739, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.7254204378043243, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078563607246445, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.6823528626588446, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7340217379632759, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5608429423716823, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7377212542047896, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5184760552547103, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5428419284565644, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20333448190047881, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45845403048189814, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1243018504102695, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2432739373207714, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5577288803545524, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6039990179307908, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.40283046047758825, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6078521036010628, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.583899712825618, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6318158012380937, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5721933920005094, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.7452043729061871, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.43434098825795264, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5541452605668559, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.30978939072405803, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.47835270710065314, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.49173653802585443, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5612260220369396, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.47992366044240764, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5677112072864312, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2837655732884958, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3548188785647291, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2837655732884958, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.35108999563681076, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5751167640035276, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.667747434082355, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5303402316424991, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5507521834828499, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.34335742724395907, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.43839021069555023, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.38815593666004833, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.590684675442797, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5792154834458868, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6702416408010979, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5486578343933162, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6828178383434446, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.05060207207173417, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.26022022728092614, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5366441955817576, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5916693427343216, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.475628933599271, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4877017847791856, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.023022429590310714, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.4755160782268483, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5834441686450073, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3875738939089708, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4061202407032717, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.4478039777693801, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4520078853554537, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.5013053504161699, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5258745854532912, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.43310177167002284, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.47137144154052957, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.20330750836901912, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36048721816693735, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.24356042138823908, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.39273557843889717, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3267294026204632, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4333271065990519, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19406071325325308, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3533983915843073, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.22205108445785451, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3592356435419641, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.27213157170702923, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3876323907884819, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.22936436185450496, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.34768470593248824, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.0032051282051282055, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.5502408885295141, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5526384138963923, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.38630936190395015, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.43999811415716666, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3479992308096562, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.43167311437074884, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.21171460625310196, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36791058458941883, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.21171460625310196, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36791058458941883, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.34019506273883837, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.41875873911174444, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.1983704639040531, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.24582153349970853, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.335417686033102, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3669951214641638, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14669686413046806, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4859054130771095, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2034208182228192, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.574929092637314, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2034208182228192, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.574929092637314, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2034208182228192, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.574929092637314, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.19300404208836477, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5580822623950328, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.41837196571723934, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2284240389125248, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2546939135935566, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2546939135935566, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1590542798683595, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4277341159613323, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14669686413046806, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4749329376022692, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2702173841437742, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.22878386498145054, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.626426212930626, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14728212724124629, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48836797124457354, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1885651765184848, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5377638658289048, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.433605176180698, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.19674214866452416, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5589633489315522, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32342498551930216, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5669007627289538, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.1476770699073328, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14757581190431865, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5161370290424468, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.19394698940390126, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.26825696990387976, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18661491233517533, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2813929671493715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18830095106396066, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.26618195694772356, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.043577915062129685, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2047856964061869, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.32185278974853804, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.42815980887088867, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18818468056723417, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.35937272275747895, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.17339704923260021, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.1992260278488686, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.15025290533149663, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18661491233517533, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.24224327949566424, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.1742174981041678, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5025112453850064, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2706380285588004, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.47250854082936017, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.19394698940390126, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3735791805273296, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2291421308389693, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494313751991973, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.12607704921829502, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.272446590350039, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.12673718536830808, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2342177526227224, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.19729680466426772, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.1971221255458505, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.09784168213672303, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.29894945474335277, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1333681278040752, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.14453940362467121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.23375499051160153, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.4978067743410731, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5375886122494208, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1261312269838889, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.36361963826047305, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3444628323551915, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2042128370387497, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3883201120549514, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.19978801989899175, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3566648944161287, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.14189763458782154, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1396681036260065, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.16770949497883902, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.6695580814326164, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.6418894961106189, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.5249562833679632, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5698312265671861, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3090705808198716, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.42787161685907876, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.22986864711558166, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3934151437056127, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.13309867982541607, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.12013269496147223, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.223476690880495, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6417603075499863, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4765874091118851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.45911557772276623, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.45022125383821326, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.46874267375238576, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6350593429017282, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6022395694696409, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3407065041529668, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3482814151315599, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.7215691881328408, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.735100789804592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6219720158712322, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.638758039725182, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.7215691881328408, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.735100789804592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6517181973179901, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481167033801164, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6517181973179901, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481167033801164, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.700487718300918, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.7205373993220106, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6219720158712322, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.638758039725182, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10143591215942639, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.611843760819802, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6194911484836914, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5796016058049601, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5650600724216098, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5894567062209923, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6051783687131701, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5907010930652489, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995581839975431, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.7215691881328408, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.735100789804592, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.286085230494539, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3712901520520525, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.33500599401126563, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.34371117385240735, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.630923553986829, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6458808155334796, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3013901676230198, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.38106012955734714, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.35187745073108273, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2850647115160651, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3190905975399158, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3175465093373464, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.43458947791319813, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2614502000808532, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2614502000808532, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4033902612785559, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11434800023292974, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3575409421974955, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.333187321272665, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.13598345546333285, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4135396704381328, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4007426083582278, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3101159279982649, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3909330178955319, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.12962472880491877, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.477009194099272, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4077182885080447, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.217295409663537, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.12173115521158184, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3904544509639755, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.540550443602966, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.36314748337164254, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5392658386159207, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2293530951556094, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4689601314620498, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2293530951556094, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4689601314620498, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.28552619466205187, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5197914639547802, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5508290063627067, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5508290063627067, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5508290063627067, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5519360558961294, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5499993547125768, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27062395495883934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4407436716645838, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3711335186021823, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5595427509161435, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2294068720558097, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40458364050078693, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32588643749980295, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32003170276441123, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23088247483586974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4093450185788297, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23277990398476162, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.41508156849006633, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23274285105688466, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4091360043642453, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23189835231884592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44157797833899437, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2324422363331168, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3902749945003648, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2324422363331168, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3902749945003648, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.11917756990194882, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.37850093315889116, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.12901518038350407, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.42910669735129914, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2185876061269953, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23251355381714656, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.42986991593991275, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23189835231884592, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.430897509663055, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3330732444230803, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23721317187079113, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4319465813689286, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23189835231884592, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40563324637329695, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2403479325641683, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40502552058528674, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.29417113956364643, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4653698220842079, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23088247483586974, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.42828303349678104, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.45237912327122276, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6299071573751139, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23736810439041953, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004852416401387, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2916261378761629, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4106520926894174, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.282764733088686, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3743678965131091, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11008681552948653, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.29720349300427057, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.15511550090520096, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3922184662482167, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31430120091187586, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5013155459452984, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.24715087322306514, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3982414931184515, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.24715087322306514, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3982414931184515, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16451929399933107, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2957279302594959, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.33926500357187594, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.518547841500169, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.17923169300061667, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.10793583834163357, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3079497311888636, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3049259527424873, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5152417158067153, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11401282249739858, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3117911565455793, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34019506273883837, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48708558391259515, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32857675712890333, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4464566565029437, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.17260955718893642, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33184166448858593, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1676136890247661, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38914692664434314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5605065818946205, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.586853267829013, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36300296341860155, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816676674074003, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3525399760372503, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5798116969849163, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927053212677373, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5108598154804425, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5369357392301045, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5869601651194081, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32163989714697483, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4414323713243047, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3722001929300059, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5252698638532942, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5369357392301045, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.592567167759071, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5369357392301045, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.592567167759071, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3514475288270508, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5361569875660316, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30653735217115763, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47019788701989235, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.502115172633202, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5841884402878015, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20913838136220486, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.40960094031121963, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5369357392301045, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5869601651194081, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.27914759735007616, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3958350231734361, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20974733068050955, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41455868084196934, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20913838136220486, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.40960094031121963, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45729707612649767, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5446642967099489, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16086531618356015, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2797876941198672, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3277803741755935, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4267708983045122, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28765408533715414, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046608868073569, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1819722649161304, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44254730215235283, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.25291831689404154, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4482360279074225, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.279600269133294, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48172049854477195, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.17087214948464993, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36469648037819463, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.22872196013470597, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45089857576633846, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1258907882951215, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34143648068854054, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3312427799844288, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3489311423593648, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11986809949741643, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34629467658248214, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16978008124086652, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.35898786835400787, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.1685048160073683, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.17550354183836317, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3977547521857469, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18910054845801366, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.46711157655346974, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32666181171942305, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5670457942911707, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29175929784144866, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45299010750030405, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.15919612785658785, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44556760211080115, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.30531851995191167, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.24463910693302512, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2908660541001102, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13305199541830684, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.43244987270004115, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3377385620641691, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5256128450453542, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.44776047557667586, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5801193947715436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2875583820017638, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6087635830564418, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.356937545593233, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5492415189153258, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13836903384315105, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5516548411073219, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.28295596283263513, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6067794553589253, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13597796343834895, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4484665724193764, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13597796343834895, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4484665724193764, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20863283213455547, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109257435313587, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.14153945350617025, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4466955096469747, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.14785967807080633, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.46460090410941435, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.34589895849033103, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6136307264772042, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.23321078701700157, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4758513477167604, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2810551683573811, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477048453606161, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.473265686519562, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7021422985630228, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.473265686519562, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7021422985630228, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13072010980679707, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4862878243251588, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.11810019511256618, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.3708545152745943, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3423375720396189, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5343801172775681, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12546912767038895, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2651343523961406, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12407216162020399, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2664864612493293, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11261597894135422, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.25234827342962907, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12757855945289526, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3385577201847465, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0841173493088763, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2901271494395167, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11556653761629153, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3140069931838876, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1175771442804648, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3103572690939351, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.30248425155787256, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3090152221044926, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12629279972753293, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2933944065312711, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08089898227976655, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.25718236707018105, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.19563561963987236, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1055629358593665, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.30482671461107386, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12508519872397364, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3057389264653955, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12363251371327445, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2692822154793075, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.24470651147480013, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08535078026464633, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2981048700438665, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2776405456134286, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.09410612421964877, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.13696035837771334, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335388002918436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5322520826224556, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.727131667480615, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5266403878479265, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6838125749299477, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6871514991080862, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6999124430022288, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5322520826224556, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.727131667480615, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6871514991080862, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5322520826224556, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.727131667480615, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6871514991080862, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6871514991080862, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5523722682139371, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7471196627888963, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.49611334615935637, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7296403081651096, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5276151436342643, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6871514991080862, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5274964121279998, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6941952618694388, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3763278728427448, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.47453622408885904, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5523722682139371, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7219229057874782, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5002744991426422, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6429454824803486, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5523722682139371, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7471196627888963, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3213275671595753, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.40961777715484393, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5773184063472755, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5331034421473965, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6806929097519565, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2092659579124333, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5354766759595367, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1284866896836278, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.54959682211865, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3099627272480552, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5376391724348849, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.15742483335373852, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.43519517439687405, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5074919773327776, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528956124603694, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.32705341718250747, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.631354608587055, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.6144246566045058, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7550732449201221, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.49582717346593747, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7377780892834709, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.49582717346593747, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7377780892834709, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2238855010644693, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5288881528593262, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.06295028070151909, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3271734324151627, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5831654195482086, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7601444481236787, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.46797289640794176, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6053337914448172, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.473016146288238, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6981990328342826, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5023049672447087, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6885140390468562, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5023049672447087, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6885140390468562, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.33638190436371745, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3649580097673384, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.45686283928900234, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7163619637625416, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5703017172567459, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.78509136371851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5226605904538532, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5272140519221666, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4692685009782657, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5107231090800101, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7257353760607679, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2680165156355779, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5590529072823445, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5470973834864862, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.36023811550441615, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6357024906157128, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.36023811550441615, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6357024906157128, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5563318425026342, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45238747470939605, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.46536973216841626, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3200938205435179, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6393945963053513, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.38037736789549603, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6906084512101429, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5478041897913022, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.18882437844970767, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5789754712947318, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3200938205435179, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6393945963053513, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.47013511859874646, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.43427164452809086, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.526079635392936, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.32001589569502475, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5274343388526991, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.31869191523653845, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5973293882694002, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128418715354195, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5961262622141211, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.302221525161365, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926217012511299, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.289331164128846, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5515215768338904, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.30470915491420003, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.542875812219914, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.289331164128846, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5798018459101258, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.26144021961359415, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5270061175207554, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.26144021961359415, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5270061175207554, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2994985311892038, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.611702219968759, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2994985311892038, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.611702219968759, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4376035090962864, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.312793730905921, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4874766693766197, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.41841993715998216, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6644204176723965, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4259108629005092, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5685406243620383, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3162277660168379, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.533113142157349, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3162277660168379, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.533113142157349, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.26596351950249564, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819860704357703, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.19037861963633804, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5872021106043722, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.289331164128846, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5970102524600497, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666467303030572, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.6471929785766445, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.34835288582718865, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3456033257148638, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20038908500140973, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.38365854681342043, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1845895819969781, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.33578716975410133, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20038908500140973, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.38365854681342043, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.48682021841593603, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5363950719917215, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.48682021841593603, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5363950719917215, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.15071676257541072, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.28916309026824916, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20038908500140973, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.33634707243903805, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.10817340917404702, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.6681082569496674, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7257478469803625, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20105373454060027, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3461597307805182, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.18605335292758288, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3760221461307777, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5998949116852975, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.6761932468711475, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.48593738187963054, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5468690247087608, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.20515691941627118, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5398995684986874, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5601040209287937, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3664818394063563, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1404678994830558, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.470059848862195, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4348498215169483, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4348498215169483, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4289684145996228, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.42753415245967574, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4126375622076229, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3892646190457294, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3964051686924964, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.36685295299999376, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.16703297708538037, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4783270379573212, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.2739399901032569, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4227538623950431, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4232731925426904, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.29607319718105773, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.14523626605098836, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.47292367331792823, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1759326236712771, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6193592475350455, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.38894730753477064, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.1915653719498864, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.34479320712524314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1475256903804988, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.412830471303484, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3097707930450913, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4491537749988506, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4088404017667301, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3026286472844013, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4888814490763881, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1751062735415077, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.441470604570403, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.37429964421261713, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.31505045452389213, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3361093414752474, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.36806833406434675, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3574029228231044, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3835415349913397, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1440453111117146, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4724943996845495, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30778741582971547, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44804144802573814, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2744668544653848, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3540230245951609, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2167710345443372, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4369788276869796, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2608721373229356, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4425523567948261, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3771475821372856, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.29089871340115286, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.35722668031235094, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.359712726799806, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.30742231621284044, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.33091815669121233, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1627690505553611, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.41167257464400747, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3111260879051516, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.38964624025381533, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.2919152112878164, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.2919152112878164, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3074764316375467, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.23220245818800425, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.15587413918588733, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.22637791583194047, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.46140916594629944, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1974060384182083, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4392402072324801, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.2876656267355248, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3597494652312718, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3564249821229717, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3526827693240968, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.013937336186971022, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4256815035543486, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.25381494737245897, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6023865926498774, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.20828838183973028, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3919127720212458, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5056837352118368, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.21800193956058223, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3968860255103736, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.378786561257948, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7119470941244537, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6888865426349207, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.499435958665741, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35788354361456914, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.550328705070924, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35788354361456914, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.550328705070924, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2612013339983373, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2502511779218986, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.11104601968657288, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37257484093951504, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.540889451315855, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35597394633234813, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5408630324572982, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2910873658777246, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6726279357179137, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3259889346257788, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5117673950877254, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.21675453206953177, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5731679833174212, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1423071532720465, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.35461510790456463, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1531682455208201, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3155588394550809, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5004428568600464, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.302221525161365, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.494647923435533, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.14239015212277228, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.39011057458940945, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.39362191501483834, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5349231274294316, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.136176992562601, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.37233856369393126, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.31913543337589395, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4641046311505297, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3191922067457696, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.46967594996871187, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3191922067457696, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4322938742789178, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.288118595440787, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.288118595440787, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.28416048502946245, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4011088672280359, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.39369193933754015, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5367684135815661, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.30470915491420003, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.45679896683951937, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3792611111696206, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5498654958143645, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3015899197760847, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5407786457967327, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.31322594245234425, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5703202724838755, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.11894772669130835, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3633504363047486, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.31630329237321364, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4566752559119006, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3387725035817808, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.003987240829346093, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2837655732884958, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4099902893843507, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.379527516398695, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5401652761721537, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4063299618766256, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5674287431823071, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4216383984557386, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5776987776765293, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32053532009253083, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43622106696027124, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3598105144545827, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5180775171589359, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.13877365047234683, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40018004900198584, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3312615528522037, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5044209915579331, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.37059119553219877, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5193244779395523, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0963782232238735, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18239905452001884, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42530943703228064, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3424536495902577, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4759175045802691, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19988815653705452, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4045022211830996, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.57703623574782, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6448162735306054, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5596234868159183, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6146584645720056, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19839774013808123, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42289602914471713, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4036833691062196, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.25214095994813157, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4596858639513775, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3383930155468444, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2514130629286468, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29615165360116247, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4985825059590937, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2878333693524178, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5587364816350978, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2878333693524178, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5587364816350978, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16363329713787583, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.40856136233285967, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14087105852460519, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41054287635417724, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29036802523421973, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5539971386602764, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2878333693524178, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5587364816350978, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3642301318550941, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6103580099130892, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24479879846461303, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.49899959922136505, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24479879846461303, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.49899959922136505, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3642301318550941, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6103580099130892, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21993826115474513, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5016488706154938, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2651383005722524, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44609755185666794, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4419435974935697, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1470815798406082, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4857421015300819, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.489454127538689, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.23769560475681795, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5396996022143221, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2992831985740625, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.276654640985355, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12030803390896863, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.40343650886859533, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4718372009351201, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.514371044868007, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2167183006379162, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.22719782145384587, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.35412968165085734, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4347460767932904, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.18295654224495206, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.24542641806876497, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3308548466798256, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.41359956263207037, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2204887282071633, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.29000609768684643, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.14962848372546667, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.23352930000467023, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.24294928497714877, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2646015952359329, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.31570636967571897, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.23077125078149244, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.287152353520795, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.21538202046194896, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.19698014877442954, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.35412968165085734, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4347460767932904, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2697856975860103, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3242134827269961, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.15875722180934987, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21215881138968976, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.03692675325478651, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.09241677321543408, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3308548466798256, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.41359956263207037, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.15979727306988675, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3237722713145643, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.33258947777791037, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4033582072599889, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.46631004676740256, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2920066581890844, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.36052310235569734, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3813114671801389, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.39681350602982185, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.45701511801046707, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4585873077614039, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.018115942028985508, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.6061105881801929, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.6319310381959741, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.5549270001127403, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.5679891636123928, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21061661601439866, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.25309156506322616, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.22979089497880623, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3353527174512909, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.22979089497880623, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3353527174512909, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.14098829079624148, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.18187234472987732, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.45701511801046707, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.48271019772277457, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23442577260681427, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.285898366238754, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4223157402750782, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.517518936974854, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.6061105881801929, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.6319310381959741, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4753776301541142, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.15756009499054252, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.1917949409166148, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.31748752251344486, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.33242017378335365, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.17567885044803916, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.293234132230594, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.09900753693425239, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.8164795700243455, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.8378701010587658, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3841997815718642, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7084389853258415, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3212854967972961, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5882101692160411, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3212854967972961, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5888103425295602, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1767874865365185, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5011179630496438, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4853620477754487, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7648536899268785, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2634592129280123, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5830409447205244, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3032929624979452, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6298789412741047, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.26380647520822076, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5568746681062378, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.26380647520822076, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5568746681062378, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2634592129280123, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5830409447205244, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.467013157950131, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.74376385639865, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2505523539251516, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.58636964504927, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3032929624979452, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5928831841690573, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.49083010926596, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.8148280157752726, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5298537180556899, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.8227489319779633, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3074203461059601, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6805350756811194, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.41247506600834843, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7230635818151804, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.43000007605628365, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.676922662116625, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.2603279434460069, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.15094258117090345, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4957807154774304, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.46175494867931294, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2460137257692754, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.49701225041511987, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.22229849552064015, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5180400950710813, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4768696920928415, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.14402386030995318, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.47894872241562697, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.340936057128646, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2382939784364074, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5141279222953337, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21688801920091497, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5164502018806274, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21688801920091497, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5164502018806274, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.39139435171133763, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.43354092326762567, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.22060236130929797, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.47227062964235317, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5039471118843061, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.49622717544883765, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21059469740788836, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5326130101027936, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.44503579680221195, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4828640829097101, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2853837110567848, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109111081376363, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.41754208336163356, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.491135543219209, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.09970617113450042, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4157699695564606, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.25678404806291744, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4451755318490681, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2553868974229174, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.19334524624566626, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.29379118299158063, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236831459257523, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.16336470131023162, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.42952676125304995, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.11902001907030836, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.37006869202652387, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2066736214472914, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.43330165155730577, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2066736214472914, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.43330165155730577, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.23220198081265267, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.31389820868327617, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.1884575363509431, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2797941128523861, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.25678404806291744, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.47809631145021503, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.250737833894674, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.41575118479921364, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3073899153729068, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.24048179389846544, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4185563869439441, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.24048179389846544, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3887778043362071, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.20178615855419402, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2894670683676958, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.31029223530511546, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.49153218387646985, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.25748661016289676, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5470575132556847, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.5006114305135408, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.7491334025878413, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.22419056820298167, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5391137756687266, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.17640520319198946, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4734108339946783, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.12607162121844206, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4021408353441737, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1691896235111172, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4942911926258494, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13129018476513854, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3768713474616184, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1320400322138432, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.37069186801926757, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13471766853689124, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3119115134466902, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.20910516205585403, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5402831897795416, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1664517270460891, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4173599374906763, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.4545984910679452, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.6246203671316624, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.20040945203683083, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5452353998273013, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.3244913214932798, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5736635347103882, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1333675348352632, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.41536428053084135, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.15949080497053747, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.44751332637022834, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4451296305423127, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.11503804692641476, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13232291594986312, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4925850413196293, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37652399404345116, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.42944002472700066, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2833048493206313, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43070982670283003, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2833048493206313, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43070982670283003, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2833048493206313, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43070982670283003, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3855945567658395, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4425261260201938, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.19981064492544628, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3919499606072391, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.29041376174024985, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.44402718863249474, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2909755167251368, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3877276005300317, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2909755167251368, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3877276005300317, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3762332646616698, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.44173278869750104, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3612027039755029, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.42754895008207144, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3107474604434454, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.37453928297488676, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37652399404345116, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4474954196802953, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37652399404345116, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4474954196802953, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37845413812632356, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4493430485139032, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.19789890035445162, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3035656856094186, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.27882795137246297, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3921594573906297, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.1741601767923143, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3262568203835466, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.18059957192216716, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2915951586142145, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.39732265306094305, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4578339801815961, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.34973466192007374, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5437131659817407, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21349332971500937, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43940628135351467, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.22437887629721057, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42808989474838754, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.25906260375197115, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4529830757711879, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.330771698935835, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6040822481559811, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19609258298805401, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4573617031251922, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28061557610476007, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.560457154689433, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2778391283025028, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3612052784705575, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28199624764312037, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3738706207569365, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21094997405663482, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.39139474899337623, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20957556911439226, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.31847789452338926, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.34016424094441117, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5639990181718147, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.25730561864888907, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.46584286350414655, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2308701796614315, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.410716638103963, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.33958660128607676, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.537913519711218, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11579175680314889, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4260038580397574, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18855406981696507, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4726960575508474, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.10751512066844296, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.38028346418696723, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2516546237169354, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4948297815905772, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2862280732631754, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.37692927868434317, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.18002829271425147, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.403106948979652, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.31961066230165364, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5296986868661864, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2617850444952687, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4734548340984469, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.26339057217264256, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.44366430876981544, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2722704374402053, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4630549375673026, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.08319794225878227, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3214719120157045, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.015108725587447323, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.10794551001487797, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.28456215809018903, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4526033316806777, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.17278330851481746, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.43449724374841947, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.11161815472639028, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2516546237169354, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49035700707239827, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.15723078586799477, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4181020982953858, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12118292800678573, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3757003782374596, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.40444733722022824, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6027382652603281, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.306542338000354, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4638359231061822, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.08619971065255716, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.27348840811300873, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.22517504604059183, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.18555209289044108, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.41750565818533353, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.18592370210838877, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4365745598211089, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5931895310669659, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.28761252282719213, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4827207642978027, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11508711800431275, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4108808711506724, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4700695381391195, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.13254512146117509, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4538619157918892, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3051552202117091, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3051552202117091, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2487541869141638, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2668685259208986, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.11305182785450434, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.16927318970546587, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4389579378602983, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.417766128554039, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.12219667481477892, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2978425654839479, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.358147858289425, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3897036060558728, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2032300760326944, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.03546365219964397, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.262306546138322, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.33626043540904865, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.538764562032814, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2560635441623709, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48856557052258637, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2560635441623709, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48856557052258637, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.255778504657948, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.48897269386714304, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2560635441623709, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4780977367484985, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.24708913859051898, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4688632265816501, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2560635441623709, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4780977367484985, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.10701006234065982, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3553402191980576, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.10701006234065982, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3553402191980576, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.17631230873436973, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.40948716303664606, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.17631230873436973, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.40948716303664606, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.16436148154531297, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.397352028359245, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1065411343722391, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3734232992811716, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.4126320391097287, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5775573112750654, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.4206365669817144, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.580271336573232, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.31191777012681615, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.47353558787477906, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1813585789583184, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.418713891470244, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.28752161537963966, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.08447773742536654, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2971141035174409, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.08036517486170719, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.35080892963779386, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4257336921268292, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.38564750221750704, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.33113385685815255, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.38179457014405943, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.34753315047563166, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6686626267078195, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2303529811975853, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.601450817823238, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.270465701330031, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6186468607030233, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.30433571457093384, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.23096606881002163, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.18169892215734937, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.10970704443189919, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.11556184719177061, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3962645069468096, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6785332347807157, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.22756030775408279, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5355940887874694, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.40885007148965863, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.20313943660946668, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.49835637707333413, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2196153013796637, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5749676939081495, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.26380148805055614, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.10970704443189919, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4472084730734164, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3538049406567624, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5074236666326171, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3353862189849067, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.14710499415347322, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3568052877265287, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.25318946083294674, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.15865783148776286, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.33433809002301496, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.286272925041448, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4498943788008491, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.20022731236728245, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.42151488713496327, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.011268318581249104, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.10353631565299981, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.009582622912019589, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.10219597724178935, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.09831402774544744, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.01915086268707571, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.117952323965113, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.18846978629906425, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4536070913655126, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1500493128321916, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3450393290642532, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.298553116272535, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2910873658777246, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4167177150134637, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.15865783148776286, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3481222132322815, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.14247195164848236, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.02197369741456696, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.12974998979978772, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2887138086538547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6342291345998248, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6526460174517784, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8298157168649672, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6756014232714684, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4529852871970908, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6941474239078328, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7912619863720214, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9129058871674676, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.37589902061551017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.42554151277542873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19272923456045185, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.314589204347422, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.31383720140423793, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35896609082765174, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.17258341334464164, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2676886314602364, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.17258341334464164, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2676886314602364, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562402498959597, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462132320098601, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13020406236745924, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1949478707854044, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.15751868278140355, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22016962866538542, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2980986167657195, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3657032294841098, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.18056556996385742, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.29290088416153853, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1277700534498365, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20846991452438368, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.25985341959039815, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462132320098601, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.42988105429544615, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7577244658187771, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5366411241731205, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.825566494253596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8665162960307256, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4360038791211645, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7669087484597642, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.45633698079947865, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7873539040299812, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6153267326643309, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.825440708536967, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4135171000263379, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7050151549073953, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6907573115737006, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9077913232255387, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6907573115737006, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9077913232255387, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3345794609803645, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7523344918083558, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8665162960307256, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6099202439984865, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.46997395980026974, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8114935753258365, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5602926888650678, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8525951197743737, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.46997395980026974, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8114935753258365, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4547722460981925, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.793631811653261, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8665162960307256, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2227227312202058, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.46376525111514705, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44338575968779337, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6736450219247083, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4165530720734658, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7027805129995731, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4027788021844849, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6872835607174038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40245827940445855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664090181705107, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5552412314880962, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40245827940445855, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664090181705107, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5478437342181306, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.775208668957614, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5721869364474972, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7849794664517259, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5440766840557734, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7530101164980872, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3623885503140912, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.635707923773145, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5440766840557734, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7530101164980872, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22447836580911282, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.635962708232662, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22454359390997047, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6008671717139555, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6537813760269277, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7617489761353242, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3964122180109575, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.584540734626554, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6537813760269277, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7617489761353242, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.41307323705325416, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5785653391533346, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.32797138117025904, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48645628248697975, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.30654117764398475, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078250490446609, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.30654117764398475, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078250490446609, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2280299254440877, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4447177675003817, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5069147001007074, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481561033107894, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.30783338870673943, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4549681528678131, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6164314607426773, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.45686926996058685, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6147353968946632, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2081154202394501, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4040637852369789, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.062313574266204104, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.35012358768277246, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816664251371266, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103582047611184, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34895836374229405, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4767378358574124, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4010889714538991, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5642546048162433, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.30145280436636923, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4729753929525169, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21992062963866632, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4070442354164886, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2622716439052442, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.42384915893461766, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35210829264331733, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239651686730163, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.24831757596380594, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.42241157904531507, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2550784982476315, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.41390693736043466, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38732841080078323, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3556696449699231, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5043507390160538, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2830789070123405, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4900779362555344, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.30690336937373786, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.46380735910652676, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23632009599741205, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45434799944761456, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38584042605633057, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35548377438423956, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5150536106864393, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35548377438423956, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5150536106864393, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31410396987414585, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.35098096867859657, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38561859819475125, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8958039312312598, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6433799261824519, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.81037697367602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5403356450597102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7639130574395125, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5815699184831468, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8158797976578578, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7216597075217096, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8647649160356543, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7637483416234716, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8894308321293988, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8097013849965253, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8896806148658662, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8097013849965253, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8896806148658662, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6161420984415483, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6945809713247855, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4500531895417844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43027065541050147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4933292241270431, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5225247297523148, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23487811400114963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4062284746604391, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3763743474188506, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4017565065239436, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5135063580594283, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5263652476607005, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4933292241270431, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.511824430191619, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5805399561362194, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4810464260105228, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.15764980576646254, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5717865334172536, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.15764980576646254, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5717865334172536, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21510618470971102, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.41380245501613677, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489214645008508, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43287961433058714, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43081631154211786, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49539605131242165, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44728880966754114, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.47426640493104016, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4602272932792115, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489214645008508, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4519496200669607, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49539605131242165, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44728880966754114, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49539605131242165, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44728880966754114, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3917196589390866, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.555059558533586, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4825434542324755, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5617848264135781, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570886750436929, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5095895501997145, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3532802306487636, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5839265501529569, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.49961369350177665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6351045115684573, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6608872773156567, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6608872773156567, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5273318485276001, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3532802306487636, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5839265501529569, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.49885432872486163, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6436097366017006, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.347738559973399, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5870908284609656, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3732103775315803, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.599753778627387, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4647137781420131, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3855522725905196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.587260566914102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6368371029698285, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3666340989897011, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5782960278998768, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32892676518285585, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5800761309604682, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.46550355389682435, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.646730632916617, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.35138749399652214, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5704306601285767, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4592978565863154, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3299895472527792, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.505943186361187, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3299895472527792, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.505943186361187, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29456425448249246, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5691358329649412, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29456425448249246, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5597065389350384, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17112018824070996, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.45537231333074557, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.46550355389682435, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.646730632916617, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17317111195623838, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.49608097529808043, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4714472446464193, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376192011637994, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47199515498282607, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2679646241939333, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5416663840402111, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3571029859832319, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5866468420684132, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.26349889713915725, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12514328743841557, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.34961836061490087, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19650854773882592, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5134302167765095, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5205634208063233, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5287371836061487, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.518761522736185, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5199744506588604, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5199744506588604, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4397936463531347, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5197498882205425, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5199744506588604, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.39814417587130846, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6510906999464993, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19650854773882592, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5134302167765095, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.19650854773882592, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5134302167765095, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.1739898487873076, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216901258730671, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.18960267898639677, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5147057623329436, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5199744506588604, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.48994178177127756, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4916235564562672, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5106109398471469, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7688046995197549, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44778459441351737, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7043336945393497, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.32214112487007024, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7794716829174484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5564992960428438, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129586382458503, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7857394056399366, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44778459441351737, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7043336945393497, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129586382458503, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7857394056399366, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44778459441351737, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7043336945393497, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.44778459441351737, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7043336945393497, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.48222455960294414, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7511716303980656, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2078053264420889, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5406521594311308, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19288600440785333, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.39234287919326716, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5106109398471469, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7688046995197549, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.6725157402359803, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.8704716256581455, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3494188591554153, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7072510223788713, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4544489097785626, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.8155088724539601, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.27693481162988715, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6775019273554157, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3012697212106696, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6712254595250785, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.39693478727333953, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4824766987096576, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7665355959167616, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6158161554766717, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568982835533138, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6535194995338728, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8909391457425937, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.6158161554766717, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568982835533138, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4062749424452353, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7717578180410056, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116199676115453, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.9390825256401198, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.4373266725468241, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.7265900332348232, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6810920170253699, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8151678595510182, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8873630455888943, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8571061116877262, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.906026511295714, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8151678595510182, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8873630455888943, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5386933265263314, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6707532211471023, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.7298378378464025, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8542310686470678, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6317498622799441, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8119634168566612, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6317498622799441, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8119634168566612, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.39569555015790975, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.654677791684296, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5846059222883585, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.7483293841345244, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8146526693270999, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.8151678595510182, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8873630455888943, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.7483293841345244, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8146526693270999, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.11392322187442314, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4512595975949005, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6268498243626915, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.39882161697649804, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5852187596735429, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7147018027438421, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31684822717918226, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4984008175596484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31684822717918226, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.41779931059703573, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.31684822717918226, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4715336632468998, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.01834337391695103, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450474312451057, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1540231640374204, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.36182698586213136, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5606184355158915, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30717471678915614, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4836163769308175, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.30717471678915614, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4836163769308175, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2765950320972588, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4432072463778114, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.36197244208226037, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3081498800129959, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450474312451057, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450474312451057, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20248027846537173, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.435318130545113, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450474312451057, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.35620708925421163, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5293559991315759, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.32620243905074275, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4654796791877148, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.17415784669090767, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3586870164339305, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3682241310101735, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5606184355158915, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.25590356077469273, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4786634069414725, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.41469320341466864, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2371332024655201, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4712410964015287, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3762184183716275, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.24622924391353238, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.483120102899027, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3733886407390251, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.38216426442206797, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.41469320341466864, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.41469320341466864, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1447953615777742, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3672186527241423, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5017688442812349, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3398181362504403, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.25590356077469273, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4786634069414725, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5017688442812349, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.18216041731535018, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4452929993740753, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.15763647528667518, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.44489192252868437, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.35987632082503146, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.47242552761762285, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.44449614932648224, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.33514014117550306, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3832378601967276, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1635655187775842, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43041151847878933, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2833653799320801, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17493298656673703, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4597805060995162, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15911783110981517, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43917006704544925, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3162730677141066, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1446920532195634, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.390638522705562, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14153945350617025, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3789032029729825, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.29913853007133406, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.413299350315557, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.016642483540319063, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16012807922240752, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3670896699555711, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16279348731624776, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3909559744576112, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1523624986106406, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48503882116076147, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3264336472812388, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14054613281857953, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4001732894164143, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17311878877656692, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3787369939968744, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2369690015211867, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.315195724666321, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.5709867732881334, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.898461683304813, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.5009303657723724, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7151387779856716, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6721494116745023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.82229642880017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7459582755815383, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.47207580389427084, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8498299925939343, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.25970649707456084, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.679667737737626, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.33488655580657256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7293017619227443, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3079852790634304, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.6441930748012042, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.37916211129533023, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.695594076304475, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.22848056414159593, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.46412553170787146, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.32716086252780224, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3963747719907255, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.37192780061868996, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.6892896281611525, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.43781869404573154, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7710721800807774, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6289868866690355, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.818026118975927, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6289868866690355, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.818026118975927, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.4071359919668265, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.769719633686151, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2111187176080899, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5500149009411459, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.25292225602558815, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3639412530979476, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.6558757084256063, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5294365116477578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4115919883568686, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4082143130164669, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109063149611607, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4572935209207519, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4572935209207519, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4768283301747848, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5067583406451532, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5067583406451532, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4572935209207519, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5424577953561173, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48832236072170526, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.45365562700803175, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5067583406451532, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4017552060255782, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4095191931337695, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4533179790364423, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.45365562700803175, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4039245319097338, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4477905900494404, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.47331152665045856, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.8148863881905419, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.20352458275127414, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6347163227772256, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.37738482597205875, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.7950074174417372, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.14529647371369342, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6260669205212257, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2411108351008944, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6937177458584591, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2115210387013347, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.667519467729798, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.32685141385924577, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.70370764261192, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.4301256869954793, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.7706340593676131, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2930010353512543, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440537717085414, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.24659038451613516, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6742202736673089, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23713320246552005, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6907833610938203, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2256490809237466, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5866569481329091, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23633399735350868, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6998524249051279, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1475954394771886, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528001650752809, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4957546540557718, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.24434615132870352, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2997691103544319, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.718671272125112, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5852108991184741, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.44374537633106365, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1150950994149918, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.561451772736876, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3449754050963302, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5521970778133709, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20304086026162935, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42692494747660203, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20304086026162935, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42692494747660203, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.311238976401943, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2175445226496326, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4621999154927885, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20853111243606828, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4435193094920918, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2175445226496326, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4621999154927885, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.14253167772862407, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3600387580548723, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.14253167772862407, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3600387580548723, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20623370300159788, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4218885357875893, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15987105932093174, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4326511028690849, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15077961452172084, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3926830608744669, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2175445226496326, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4621999154927885, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22408126327351324, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4635198584730167, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19416717060109628, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004538610234101, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15358735839457807, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3955148125267572, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20853111243606828, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4435193094920918, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22872196013470597, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5039380180064673, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.22562504461821242, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.31412494195165025, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7973454774914206, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7689312469180218, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6532633714920698, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4867643691804454, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.64607619022599, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2814017327307182, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.496330576799077, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.794834366062997, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7747058711066555, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.13421952443691573, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1672261435520376, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21470005952317833, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3190887215348668, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.27232975940069765, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2618225775948002, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3154042850363793, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.6941268297866866, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.7232925116612475, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.28688236146427454, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.30588462336040045, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.24388575959910294, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.24388575959910294, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.24123834935482885, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.30588462336040045, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1715463764333805, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.17571089085620334, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.22262500195341844, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1887792103487876, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2176061023319429, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.29799812108040613, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2920370029693632, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4216533376172454, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4039571839139068, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4182495832421204, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.41184258626249504, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.19230188007838597, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.24907067260370286, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.24645013697131232, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2775164476006572, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.23587293236782791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22109333005198922, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.165099903439123, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.37585464732579915, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.36761798002928614, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.028350234434630903, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3231493408508545, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3101339436487065, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.5951888718948775, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.519769605051086, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.1850334785266634, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.636855658754492, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.598421012757662, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.636855658754492, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.598421012757662, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.42690137964167635, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.44383714733988183, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.21712457973411817, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.31211920341047517, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.29799410917087965, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.49831162551286645, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.37054866173134826, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.1988908085691084, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3279525547808158, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.06999105251612557, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.15760436972905018, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4578730121271583, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4763409764863904, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3210886805532631, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.35007227309343614, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3324024618080297, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5526778778205866, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7267402732246275, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4779289657345161, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6131584716544094, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.503423182754679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7185797049975355, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.503423182754679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.700643793254282, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3685289119518548, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5839727029732353, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.32594974619751477, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5361018684083835, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3190887215348668, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4863428828999999, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.503423182754679, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.700643793254282, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.503423182754679, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.700643793254282, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.35345481163747966, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.600086974472641, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3626305461419687, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.593978460413095, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.36227557436010244, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5653237160901986, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5526778778205866, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7267402732246275, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3626305461419687, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5835071609276217, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3685289119518548, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5839727029732353, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.07120149575853292, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.3048128700747796, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.503423182754679, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.700643793254282, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2147607499133801, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078156364964329, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5319774706581363, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3626305461419687, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6179355732290436, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.4721001028256397, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7096678127067526, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.4336560555138203, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8096970872393391, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.7016116562610203, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.9349183533264898, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.6298777848185599, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784032559879915, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5642137945405272, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8202028423242187, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5642137945405272, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8202028423242187, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.6298777848185599, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784032559879915, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2999242985334114, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6283146443097338, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2999242985334114, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6283146443097338, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5214131582612217, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7585614280322228, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5464711341087805, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7864029396663078, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.47022156411316, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7153092713985418, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582220191559627, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8140951454638551, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582220191559627, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8140951454638551, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5649285678830409, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8161130615280334, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5577288803545524, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7974046579591895, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5577288803545524, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7974046579591895, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5988044702245784, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7681690981335013, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3713830979377925, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5751167640035276, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.8407226006325472, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.13072010980679707, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3105434451119841, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.11967758950157248, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2800855102887508, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.16417215667010873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.36947928854089845, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.12743800102943376, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2780344256680002, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2935328201198795, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.43434504242447536, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.10403896883688996, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.311533423157872, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.285255290869053, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.40782828620425265, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.19629722764755747, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.320182335345892, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.19629722764755747, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.320182335345892, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2782546338934227, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1260969365429506, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2687407910300558, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.16458036334354648, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.19568336427344607, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3723430986912162, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.14724619050922358, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2839286654333982, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.27486902500051297, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2283070457730447, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3919468075768785, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.167045425494737, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.34131323228943095, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.13382613080002836, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.30519034852833676, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.14694567648694842, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.08541575654695113, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.21856618331280497, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.37453249276032596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13471766853689124, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5276176357172093, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.15875722180934987, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5368911830688495, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13817790393734294, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5050266837640857, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13566979610140004, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4650537985480846, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.39489495287005755, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5875457251146547, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3678659779998934, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3678659779998934, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.39584294700165096, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3593318822560319, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3064223067887308, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.16564565321939206, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4536039871760293, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2476165058078653, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5724037747915761, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.29456425448249246, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.603721366747684, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.391672963267396, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.12409597120849801, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.46250911995239685, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.38466593078622596, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4113262406569788, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.12259149958656422, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4797495741142639, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3199526749905591, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.46625097311775787, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43437903147819623, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2970991781513299, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6304107643405119, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.14527620275146488, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3233041457926855, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.37447417181493453, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6453688418182368, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.23749937482512903, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5186241423639549, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2293648308471323, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4184099073786108, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.12982679446701692, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3598851490125971, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.12982679446701692, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3598851490125971, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2583858837827738, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4449030747393809, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15184543565627642, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3316482535817426, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15034744819187046, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3379957017327007, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.6007525446699544, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7052132029482998, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.491888791452497, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6514476893155858, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4640531756865941, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6384698340031734, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15016650105868437, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.38273138700440895, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.26822974402116034, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5226016850517986, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.13064622864882905, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.29841237721609765, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.19685847267728884, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.12913533075470382, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3699122403233082, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4012179884342934, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1751062735415077, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48664097065107126, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1751062735415077, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48664097065107126, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1472462377094902, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4296561364586918, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.40354716807491975, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5496664962676806, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2557914670300308, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49203564396422345, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37595662994657586, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5278838809443244, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3764925637801421, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.598648554589699, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3764925637801421, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.598648554589699, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2460137257692754, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5651300593558061, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4252443015971012, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4389754417832011, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21413989011971157, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5141525252714602, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5162619434676526, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7117190161792883, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3964543810570382, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1927369155899176, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5652312627454582, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.22878684333741806, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5910527597265157, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.39376010611714934, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.11165817735905989, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.459384056941606, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.61445237198357, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.4111336169005197, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.654151133443915, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3911104256806521, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.7162899586182385, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3044269773076113, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5765179983591493, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.17770114738246548, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5263352658491535, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.4447630178181816, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5830457051905695, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3481790920114991, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5546274930344433, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129484309843931, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6268186944844665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3977135991359948, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12991916506579942, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.442586106514705, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3152093058791381, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5618098981717927, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.35630548449868954, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6285749485464279, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16618655166630525, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4875519643561779, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.25567957494892185, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4986688668174522, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2278350766012413, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4991401820950425, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.27241282355631563, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.44521047328947694, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.043407736034766595, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.26016211605713957, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.4390960897971484, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6485099247319526, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.15511550090520096, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4014314464220935, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2943579102986884, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1890425467840326, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49920100551781216, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.13566979610140004, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.38699631006193164, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.1434533437144611, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4325933876557919, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3182117086067523, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4064888217189732, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.21800193956058223, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4699679052795741, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.13308442527111022, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4188741469277716, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4894179053435348, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.16299446731288944, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.32080086594496104, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.22325877055095214, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4093357710583615, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.27382044275399503, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.06534878510818803, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.28228674987841224, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.0117896747628835, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.15448759309616733, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.42469356957489807, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.40517879381475214, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.38553652887288764, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.02629161215521893, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2325340467951448, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.21223633441554032, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.43890529347423884, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.314919081947931, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.028206246615527588, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3621071214396606, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.30702571862234085, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5270937279585078, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.37027304017004864, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.649647473010403, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.37027304017004864, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.649647473010403, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.37027304017004864, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.649647473010403, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545363548814882, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2810335395782485, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5956686674268991, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.37027304017004864, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.649647473010403, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5705123444441751, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.28615556452106294, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.46306995992822714, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2291421308389693, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4574939601395284, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2850782270836371, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.48913589010736547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3208090524027114, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2520126751139802, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4174826402445743, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.40792735592133966, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2520126751139802, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4174826402445743, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3172479410570454, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3172479410570454, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2255009728657898, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2079988964349653, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.19034686962671285, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.28615556452106294, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4803502934592869, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2520126751139802, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4174826402445743, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.40784251617180445, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.28615556452106294, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4763638923491097, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.24248545140243574, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.43733358879787526, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.41303058168387535, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2079988964349653, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4508926953372328, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2898497051727735, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4696277526071703, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.4010889714538991, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5073411865094402, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.19428231170553664, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4516757290152384, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2250265947708922, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.41994943513577565, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.36938092578764037, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.513005525044022, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.346982495809083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2286960004658595, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4746698507981419, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.07439001436205736, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.17201012432847057, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1196268811190815, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.13166139176601574, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1629057627580272, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.36748173214650165, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5111850166718516, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.24701991296369977, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4634617667206513, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.5696705282375883, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.6515641575069311, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.318248515465579, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5345496330406173, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.250737833894674, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.42608188471611363, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.29189371587790414, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.004111318884175239, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.2259609947599236, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5561195823338172, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5362935676066722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5803515898273521, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5422220468910552, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4204739940979302, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173824078732066, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4177866849157374, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5243375045345786, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4593860924266348, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5045746404501606, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.328872375046221, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.39142733167976973, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3450219162509876, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3993348853061597, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.42670493571995677, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.42238844134044595, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.42670493571995677, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.42238844134044595, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30978068501889056, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28295274449167956, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.29222904325820687, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2723003396696643, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.29222904325820687, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2723003396696643, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3567464687150701, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3651755892066728, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2548454457146799, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2714025628458789, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.33498389276277546, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3296536654279081, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2368693821608258, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.320909989176825, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3942831699210502, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.47635688895853756, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3031148374457066, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28706898608824516, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3223833286593516, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.39475158383309167, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.46146548771819573, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4019452398054806, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33438299066966715, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409759573191787, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5743796566387722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5768306472334509, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.32134504358579785, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5076725973953424, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30759515980677804, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4988668235835214, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.29300728994404895, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4620516485185446, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562150245540302, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47046477830594896, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3545085634416642, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5387990402562496, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3545085634416642, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5387990402562496, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27182849679730653, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5497265770945076, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.12001885809269497, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4122127192166065, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0956774578033165, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2538064061386275, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4544505188404086, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6033006987520517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3509258729305825, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5379703355059909, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3509258729305825, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5379703355059909, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.38936263771250235, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5443518219250745, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2767438561951455, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.45932437705906165, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.38936263771250235, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5468468646357602, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28555753499459907, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3416445560351976, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4049402235047407, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5871644977560334, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45506803308128024, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477506541284608, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7382416555842614, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7568286018427376, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7389181446831377, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7389181446831377, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6493613256786125, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.602867050301643, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7367363357155757, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7542976177437886, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.526357446896968, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582486914071635, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6678493404097802, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582486914071635, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6678493404097802, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.8406763479345174, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8871517153603867, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.8406763479345174, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8871517153603867, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666823117022298, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7571125338649978, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6977240390484037, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.818984467219358, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.010191247541026162, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5406438522344627, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6520694800788391, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3864572432237816, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5849342936087653, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6977240390484037, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.818984467219358, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7675449146326079, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.48470790736249264, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6505709783584183, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7019499719108448, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8450280883390384, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7397087417978795, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8865031414920428, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.37717457428685847, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5554130492458337, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31598923484911084, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18953162992336403, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45876745950873354, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2383770504614087, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.466645869611307, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.25567957494892185, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.456155589788991, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.20422838465921236, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3821382671218279, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18207052811092134, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4504432021668592, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.209100828813675, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4538036435159857, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.209100828813675, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4538036435159857, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.14291173574075158, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45184360988354105, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22059202034059508, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5084868416945086, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.10087588344601466, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2599542517888651, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.49098929416640624, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3099627272480552, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5091964648046775, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34419514726440925, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2128497674847141, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.438591227628555, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23832340744793254, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.43652543518564646, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2066736214472914, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4458626950255153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.27743662258385243, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21081851067789198, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4042801758173556, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7629273292796576, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8510385544954956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.90941532255964, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7181417752544185, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8029162546307379, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7181417752544185, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8029162546307379, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5970228993860437, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7623592429154771, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5970228993860437, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7623592429154771, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7629273292796576, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8510385544954956, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6888074582865503, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8055061207769505, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5970228993860437, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7623592429154771, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6978429290017016, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7717858931341154, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49349163706233623, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.694445271037971, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3037643089519314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183662698462751, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18376711147874328, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3981272326046884, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.340960560695735, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5217663812589132, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29898984447859595, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.535975072921985, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18189587992135597, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43964080400724653, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2147607499133801, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3976144917079093, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43603011963088695, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43603011963088695, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3941175366175992, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5506555496793699, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.36067814893280986, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5071206995409688, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1896550847075289, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4310936955011834, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.6495308560002527, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.7114963534919011, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.40589517638127065, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6149515961727072, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3339087646492816, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995623358499859, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4122974402951816, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170911690364487, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32672940262046324, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5803377043012041, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3785572829193946, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2643854378698732, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4066689638009577, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5267604642487788, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.8253498772794055, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8529564805429163, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6941268297866866, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7679844670813416, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.8253498772794055, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8529564805429163, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7072172847953276, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7914639887327892, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7072172847953276, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7914639887327892, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.175538121835486, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.44197441533246407, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5828833474188783, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7908226509294533, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.38694317759010316, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5953878513137957, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2620499195763038, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.48937240022909234, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22128776529156546, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4999323991212311, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4272870063962341, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6271734609265041, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.26332019392396333, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5893220054460814, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5924993690004501, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5394044743801475, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7201188407944494, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5394044743801475, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7201188407944494, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5730023382770898, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961516536011624, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5798430117187255, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.14101576878340707, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.35998495164287625, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4161791450287817, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6046852394200818, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.28089241320678904, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5475272889495614, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3070898761263382, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5791648909423264, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.31600229153053044, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5374439094267343, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.21690365808279138, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5019141380515013, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.009494440414752564, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.27733310601709266, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4703077247331959, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7203673717155472, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5040673596100225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6469962279041276, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49849908693271183, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5106109398471469, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6635467152827634, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.325909498033977, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5501364764829885, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7726337964681356, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4369942407063455, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6009573115008555, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3009687072297843, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5341810386314462, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3009687072297843, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.533217285012153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3009687072297843, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.533217285012153, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3140382293917749, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5631437828635808, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49849908693271183, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3140382293917749, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5631437828635808, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7726337964681356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7726337964681356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7726337964681356, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4167743222652789, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4167743222652789, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.8318180062062374, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.8641903026295611, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4806216298219478, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6311658995293531, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.7186969683828063, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.8143071707828088, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.395494817172382, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.48210216762305635, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5029543425204815, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.22543108408457457, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5721164465661742, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4884391791679525, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7050595167501005, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.15495096883986592, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.45421263258392414, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5138104164912963, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5123350305765596, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7719555466081361, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5123350305765596, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.7719555466081361, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.42099734580654347, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4355615888320689, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3548263685679835, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.24518299917312847, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5623282316325473, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.346133586314057, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6386405249336979, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4773779562574767, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.22119423000583918, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5855963149167847, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2009685362442462, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5922962229661707, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19514193023219964, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5096213500210006, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.2772639581765057, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6585810035136251, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.16111212240349498, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162765195160328, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.23109536367862135, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5052082359105701, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17991078645928837, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5159811845433955, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35504200505176187, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35504200505176187, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1794560313432444, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236301264596329, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1794560313432444, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236301264596329, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17991078645928837, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5159811845433955, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45963072970927465, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17991078645928837, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5159811845433955, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.2423268307137331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1794560313432444, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5236301264596329, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35504200505176187, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45963072970927465, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35504200505176187, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5693079918450474, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5052082359105701, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17991078645928837, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5159811845433955, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4492950042617377, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4608738248525917, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4252891537802403, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6269243845872724, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.49831162551286645, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6906494695103921, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7887116805325072, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6325987025085013, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7966336957924106, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6325987025085013, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7966336957924106, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6325987025085013, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7966336957924106, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5694299147290928, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7161974280320248, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4601221424835303, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.670463478293434, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.29042767355630905, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5945109731611824, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.49831162551286645, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6906247423308508, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.788686710424071, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5694299147290928, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7161974280320248, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2624310277292268, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6025429011085721, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6486932415130529, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7887116805325072, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7490853969372642, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7869453805471358, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4970449067437269, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5549084692917513, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.47136688868251947, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.532838700147956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.6158362062506663, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.667219143613408, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.39057277619092257, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3934016321632531, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.43937095446369234, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.46181721677136944, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.47136688868251947, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4875567860488344, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.47136688868251947, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4875567860488344, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3246935344198473, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4118595729651108, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.3754749996604944, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4281102173188986, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.29518557226324804, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4165838256619938, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7490853969372642, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8062091543413888, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.44055594594452946, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5608379078395163, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.4462203715133425, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5604772871598175, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7490853969372642, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8062091543413888, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7490853969372642, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8062091543413888, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5196976495259397, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.5405459272129465, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5054091115759235, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.6683122485502007, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7378351342269067, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7822638455166255, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.46463179926223586, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6658302112115018, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.30934066139296057, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.45568578346907496, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2982935802892967, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6025685350602339, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4546828804875388, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5363479231731832, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3917244017992322, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6342898219503306, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.42406493223220554, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6515479761083638, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3460178707517162, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4777182579951077, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.22004975188435827, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3580799708721354, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.22004975188435827, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3580799708721354, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2331039707351165, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4183718943912669, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.27131642727612953, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6108644122399178, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028609724458554, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3641531893505486, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4772349067014227, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6979552346144655, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.32144812240790727, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5957892991708469, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4041996150245149, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5081120456623677, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.33710215617303335, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.49211592362146866, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2206635048675199, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3982161812429023, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.4220575161811247, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.567941545813889, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.2411874285895162, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.7000942696222402, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.7771546905544982, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32375968335328725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4184987692475953, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26583802173257376, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5022872142270257, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2928787572874777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46379385574395665, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5728145662417075, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7040427611843448, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6169034543248991, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6942951271142352, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6054657750562688, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7275033089428229, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.41103851467561064, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5051296804607229, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2989834108541526, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3436978703286461, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2989834108541526, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3436978703286461, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4290255653710564, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.559834009444831, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3910016226314522, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5825568684453398, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16917954480308722, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.24861820310138238, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45359393336810816, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6211966673115089, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.24876675536871823, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5735144784700121, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3042060837668054, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5646760672180815, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.05909067563124427, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.29914758135590824, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4458819273326583, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.63263087218045, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.19378256970654398, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.37585899878773465, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3626305461419687, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5172467980765818, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.33573064840973227, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.6896084087753593, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3294509143325626, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5502318837669161, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2589451141492935, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.522870128470283, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.47843809609003357, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.6639502807777604, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3975205618000645, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7019684122171391, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.4093632136736518, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7444177757069163, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3680806213583401, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7106717690083967, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.23731319709526777, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.56616019140197, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.23731319709526777, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.56616019140197, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.20383958833634852, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.12854865542367813, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.29041306954676516, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11865659854763386, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.29336156127730795, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6879419393521872, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9251514068996235, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6248651455191909, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8644374396148615, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.49723587033674593, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.7738363285778612, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6508430035585083, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.8046848309301634, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.6879419393521872, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.9251514068996235, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11930191477839873, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.45709744918566686, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.14072301632107687, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4881069342294986, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6102624546684577, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7455382911279661, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129914365244811, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6719058099803666, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129914365244811, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6719058099803666, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5119999466045957, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6745099802934275, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.49494318987722147, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6525478644418932, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6102624546684577, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7455382911279661, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6102624546684577, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7455382911279661, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.49494318987722147, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6525478644418932, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.49494318987722147, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6525478644418932, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3961867597457338, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5900232242858499, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28756999622055585, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.562349006774183, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4717991357336539, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6368175528583246, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6102624546684577, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7455382911279661, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6102624546684577, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7455382911279661, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6584518248758039, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.78947510218162, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5936781016890555, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.726845452474233, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5129914365244811, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6719058099803666, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3911295456386654, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5800089719996314, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.43762500187316666, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6229933226678195, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3961867597457338, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5900232242858499, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.18558816000270506, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5082547077193023, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.25104615680952314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5386684376521024, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23592677325793138, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.47227033885649483, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.22872196013470597, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4750367449212949, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.14239015212277228, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183430108833335, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.12607162121844206, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4374864219311677, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.42219590199110324, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.40897782965894874, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.40897782965894874, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.20185773580773803, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.466274292216381, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2801260476496127, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4918406628443769, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.51375768621103, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.24447640553662225, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5297512650421087, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2698809072033866, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5827120908586206, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1180263199418922, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.48888367066057226, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.22908101428437017, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5037331050598365, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.25100150428977114, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078973712791223, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.34872397046357567, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3077112060330384, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.18555515022041807, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5078671061112421, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4158130624959958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5675993368017754, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.034234536820051814, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3041065177261626, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5406223711214764, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.16020720994064927, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4494779997964713, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15736937921428037, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4141688451038981, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.169398499987232, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4808428623530613, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.35573896339264094, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43373844074577966, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3830947997613699, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3830947997613699, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.44383351844821595, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5658578761464655, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2612246288823396, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46227338852099076, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.36482717719811386, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6036552405597372, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3040052734530821, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5563245065214154, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.47581040383390794, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6192989319465453, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.40025074540692385, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5277651665395992, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.31823566221963034, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5315680645260001, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.31823566221963034, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5315680645260001, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.24014973510773413, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4758492311424834, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.25134915147946046, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4837504342576208, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.333388748608888, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.38969673850929676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.391080327529236, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6466158388142541, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4464617303464354, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6719518780034501, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2291421308389693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5523264448305787, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5718458251358622, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7377760376723715, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2291421308389693, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5523264448305787, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4290255653710564, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6542302592826403, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5438653754915956, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7302649909837065, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5438653754915956, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7302649909837065, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5438653754915956, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7302649909837065, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48381346304516887, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5124459840529199, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.18188243375551016, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6612116051683129, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7760591039106626, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5438653754915956, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7302649909837065, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3060368950930091, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4831305031819278, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32178169209299745, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4853985554034008, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.42601467364417966, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6715816583904739, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6168046788492375, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5152958249888121, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5435363281207461, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3964023716675737, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3718976854103865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3063889665164676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3060606492644399, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4392283615599068, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.44496960530746443, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.42262353460370816, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.39118612205954695, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.25609516769327906, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.281538474327709, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2875583820017638, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3019039432257823, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.47901455811287486, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.42929076670455985, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3226647471194494, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3327820939566751, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.32214112487007024, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.33046168816283755, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.251417351368381, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.28946399423161434, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.4546697236991713, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4460520024887841, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2566290206865396, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.28445804097491273, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3179303998422513, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3849979751929991, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.15911783110981517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.164451438302013, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1931328231343031, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.5426552583629827, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.5035808316842799, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3968302282220472, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3941633568645063, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.45928067977039955, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.40650204723524674, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.3702794920873997, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3917352141397069, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.47901455811287486, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.4956337510935212, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.21305956475594143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4395285386678049, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.5439405004075281, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2348836443827469, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3379049312559404, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.042135153740247816, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2348836443827469, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3379049312559404, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1423071532720465, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24796510465152288, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1714049169603588, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22748922852371126, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.5216690820512558, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663479473341912, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.5216690820512558, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663479473341912, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3461876785833229, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4375133561627813, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2674342279940095, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.33258450965972425, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.451818443605239, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.25270455578796175, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.29757523241220163, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3292010361291119, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4311130504810004, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.18239350853723665, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2213631608633465, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23477883426250248, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3032766573571171, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.5102002548573253, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.5481850559415788, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.17148143565512755, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.26771555499834426, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.27347298550058124, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1709686260975486, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22586346179982364, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4093184131170722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6844082266550039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4093184131170722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6844082266550039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4093184131170722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6844082266550039, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6827449888069321, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6827449888069321, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.43037677381220035, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6909233597423888, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6827449888069321, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4884391791679525, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.706162766244523, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4093184131170722, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6844082266550039, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.33094680953828387, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5082966623697348, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5194247346787363, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7440660671846263, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4696150968541076, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.22656720908801994, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.631245192197625, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.18992435055904786, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5008843990379334, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5116579033814616, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21535397121201075, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5925940104599001, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.29108736587772466, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6461260377075316, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24268235789067255, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6128667394032248, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.33249685664456036, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.637090236976522, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.33249685664456036, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.637090236976522, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.43083309595945174, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6660840730280997, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.5051836989392832, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7047307770140406, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.16616411230693814, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.45262530701514303, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.30205846864298064, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6431625523291072, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.30147856626075187, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6165177250649315, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.28685780777240977, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5505861134074188, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.28630708814903527, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5730343144709868, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2026478723972883, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.542836211081738, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.34657561901567957, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3185890261553478, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.17679588126795498, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5270697005877061, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2882258334128601, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4424981883607872, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2882258334128601, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4424981883607872, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2882258334128601, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4424981883607872, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2882258334128601, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4458584061535501, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.16973628531343277, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.33812107130297947, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.173167034608353, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.35485513246564027, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2434623104231637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44265412961841627, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.5245322414712715, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5814854113930948, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.5245322414712715, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5814854113930948, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489214645008508, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44435817070504785, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.42657068031281004, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5676570548521612, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.34961722361745307, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5137611857380029, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3654749251620566, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4608305787020976, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3131480143934657, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.30536941697562214, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.463676504477036, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.326982251281397, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.32823975228899477, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.27593241702034227, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44400051895240894, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.30536941697562214, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4724348641780438, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2412588049712986, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4536639990242445, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.21281701380712922, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5028404616749624, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.15295559337528836, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5217846954711376, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.20036529832558245, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4411095116549528, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.23804003814061675, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5256271784759383, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3957831377655053, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1447953615777742, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.42451849651929324, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.37257484093951504, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5542570357348661, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14153945350617025, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4269729411131609, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14153945350617025, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4269729411131609, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.20472658408422542, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.48478811641801983, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14167966116048497, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.40914138828349356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.10344868765524623, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2279301587823655, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.20664181816537017, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5143883509283346, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1143433820088083, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.35470781080011465, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2676032275663791, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5128827077378436, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13737279171076758, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.41264185474099047, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.2042128370387497, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4468235873099241, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.22354445094404488, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.45921484835729326, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.18082678642487518, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.27195592687669795, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.5350448626134884, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4323312773819961, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5997371746139766, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.49935841995256924, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7295897846667867, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.6510651028008023, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7734123056546561, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3606966647394869, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.571396396984912, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.46227871261821646, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6461151022797409, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.34592991251442967, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5677282056948096, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.427700699409628, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6058578812402458, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.22526021557690604, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.48234602030711826, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.22526021557690604, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.48234602030711826, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.6275541561404893, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7316294988783622, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3954642331526574, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5607387096042565, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.344268011447701, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5316720524252836, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3943890898713652, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5760332856685048, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.22288552654078472, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.48701612914140385, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4070190836305988, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5949904060350671, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.29049793709903843, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5660059776713433, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3534252810313016, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6247144454333148, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.1925903910576669, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.45078285386741757, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.09941527806251362, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3375800400832725, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5942391703945106, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7043046273012841, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16373682488441257, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5249159229096783, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29663479358211337, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5048860636204477, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3790331198369714, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5564797446076439, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3785063589496393, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.550653561798892, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21798830133080024, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4809891449178095, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2543786873577924, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.474821633299339, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29622254568370254, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5178269067077038, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3845687306448411, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3845687306448411, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29013347578303705, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5229103695975342, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19166642992550154, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45352405841136717, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.32978372103036435, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29622254568370254, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4935676335226274, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3066439134043059, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48319828737392106, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2991528889155786, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5582453237847728, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2139991547137325, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4401240520726166, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19681703236998485, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4641799487191322, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.011704493972392318, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.24391450136812154, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4188399581018156, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4784684190704374, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5220744129850714, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.454674783834345, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2392702601822198, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4504639375417262, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5107881984342949, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5397858290886074, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5616963280794934, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2734404217216863, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1014524997386625, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2953987627094475, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3946720316502292, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4361039802749712, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5269731605293632, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5772772631219125, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1354715470706931, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.40840642737172866, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16008724961429593, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3979154668239613, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.19230188007838597, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5927843365311597, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.19230188007838597, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5927843365311597, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.12577829595095136, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3358649233160436, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.39812118573723865, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.38327626440546453, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.43846037099743423, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5878523089199363, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3049156495560148, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.41208793236807006, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3635609139463711, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5425942881923044, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3727355697846668, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5366981850233046, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5063945134578823, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.42656860819541537, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.24422057337699718, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4745686548450405, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.28421103644630136, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4520674720621938, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3092676945859865, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.041718664057048, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.19774288940652704, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.002777777777777778, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.1754386006461776, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.47674964090612326, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.3619317454406273, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5603087309111027, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.17101456688527578, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.31396874406510017, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19313335507865945, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5370585235328835, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19337006497014184, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.6117839591567146, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.32161227721249164, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4889481341529844, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.03999953381673504, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.14295717937701993, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.39647338707491836, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.43504038103657183, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6606468296022548, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.3265516873506877, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5955775050987988, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2392702601822198, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.45083730819525286, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.7071067811865476, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568176228429959, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.7071067811865476, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568176228429959, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.8843865924896842, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.9546910250050761, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.6396679416047654, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8130954821598096, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.7071067811865476, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568176228429959, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.7071067811865476, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568176228429959, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.6396679416047654, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8183519585847914, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.4302123694194221, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6691232302814786, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.1736516517811053, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.7071067811865476, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8568176228429959, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.44689472642345274, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6588405895873823, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.5741156866422448, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.7600101685993667, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.12074172794466914, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3854432348491077, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.7500901713271477, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.8624598551416763, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3280740637235151, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.18975516698135844, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5467364890370301, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.8843865924896842, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.9546910250050761, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.26608125176434144, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4802266386987784, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2966090320349725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4660692265920593, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18240162908464763, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.47352584565341405, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2872797668292002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.499989214794679, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2872797668292002, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.537813488946275, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.27602580646531577, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5170713393917188, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.25916413607200117, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.48625815319476634, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18240162908464763, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5287419988380048, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.18240162908464763, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5287419988380048, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2737111320203921, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2698078272149417, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.1950703465484404, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3491985989318535, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.6696429767392259, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.329340597116918, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5811461544739382, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2872797668292002, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.49898853929554654, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.034132793984978135, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.24125513696128442, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.27249745234058675, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4904389092071885, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.31756072966857934, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.20658804691348367, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.30376137001310205, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5498110282118828, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.19696533992407975, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.36270408031098256, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4425219454479648, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3847693175443825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.35479105265934485, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.6164147380135121, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.35223865745748834, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.19569434861872417, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5843699612664682, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.026164052754179955, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.026164052754179955, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1955775564284596, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.018686237228756782, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.08008973610668292, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3081206097311581, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.45267569576678357, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2928298013714697, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5433026792013316, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.31214672998964255, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08222779739686721, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335295895031916, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.30052834075859, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5716680280251661, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.04331732537669657, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.006530825496342739, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.22509207586959207, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.49023502313124495, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7638414724136195, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4424906782646928, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.705507971295129, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41452787844405115, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6973605663974715, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41032302768839235, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6634154486532953, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.638057576753081, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8245673639562578, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.343041631179768, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6383283286161612, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.44711013370113256, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7319347493436125, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0941071492118366, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.22362542790697484, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.1406005899217114, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3205132145674765, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4275810014748856, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481070648129139, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4424906782646928, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7213459154288302, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.28473824051926716, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5944159484469135, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5702655877666989, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8232854345902009, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5627396123761852, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.768337575605251, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.46409619603227925, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7474126325188408, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.42803425515420807, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7260183442795153, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4361016861714566, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7150254604232192, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.23183344653730417, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.48766037123469547, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.23962966980870534, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5138361143222901, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3597862823053843, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7212767938301806, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.07860105393900486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09678377693633947, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11601141307045003, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21671187566850864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2390076354901812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.33570154125476054, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1371661844308428, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.23455679137513727, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1809155303398683, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3604670693910988, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1775614884118737, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.21326369102393236, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24781828193168487, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.07404009463634391, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.13528723418511077, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.08990845492237175, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.16528667366086555, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.12394460940540938, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.26662620996190534, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.09118352267841409, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.12596283258548654, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10738497851825159, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.18430770214819148, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30538115660133164, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.338864446519003, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.17287505598461764, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24853382028634724, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14891504773093184, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2618919111168516, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1406879778177777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24227488458492952, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14399548544647026, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2528996799005126, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14876927210374952, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2536059288500648, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.04114212836378985, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1070604518443882, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.26411327741267115, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2898946819245943, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35015224715252113, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5701648579139658, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3349252032650068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5908087431574293, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3258812297722265, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5753985304712377, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2883113322808919, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5835478395499368, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23475000928031314, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5032853946032531, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11690741296020518, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44984705715342654, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2577716972449781, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5171901208397282, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.16424155566323012, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1781207588347241, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.04631732527976412, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.21558480215297515, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.006200396825396826, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.09350604622460418, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3430629209966263, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.25639784746935274, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5733913952323451, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2516849730052866, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441146119043165, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5168980964497457, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24643585808835486, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5459613462641708, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2696398422206727, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5506563869475618, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.004578754578754577, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.004597701149425286, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23325505861671614, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.503948422566616, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3449058130015412, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365619830343804, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.29688845677442144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494319015457763, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3257602417321556, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5666596539835803, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22511140285349446, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.553839023223762, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.30298632514306056, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5652374402593988, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.24479697566202357, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5213023098886357, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3274016883618531, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570399656004248, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.11114096285893173, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.2723973389546046, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.1196425813555299, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3031655363777367, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2211880505010663, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4573855767208229, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.1690653646485627, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4758186156150245, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2708105005235697, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.49508133913561864, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45149688763848994, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6166500596693574, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3887923654575486, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6140755432780003, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.399477857457097, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5617218895807364, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3191349966700777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5329750656706205, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2931154738386615, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.538531089380228, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.16891395122331324, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.43166826352433957, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.09175663647957763, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.32499940569388225, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.162496560019558, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4637542439867255, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376032254696296, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5334329403985332, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20039141607873007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.36123312088832493, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0564437248458207, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.15026037463138217, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4303467795130825, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.32752310268966434, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6168686838238246, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2529076741385625, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5430833248313275, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.207314191412716, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4360555836773355, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2897852141814925, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2603665007398762, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.08070632004040007, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.35911678207067443, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25288574410850384, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5715569142855546, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.29537010291406884, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4221671351559825, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6302060108035411, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4545481795400536, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6054218377148394, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4398690431123469, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6046405925677363, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.24968557018529272, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5511430757077329, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2864021858368971, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5777536673857124, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.07833614957377547, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2978243697131122, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.036093834539820895, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1939545119098376, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48320036215224016, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32707695373369694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5166643606783462, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3069937936246452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5024648105961349, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3575909322256676, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409483829147745, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2228729825024992, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4466759653076362, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.26629603567533056, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5253549608514286, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.28261688976967947, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5075237416590358, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3235473265529593, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441122251341168, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.04595467899356206, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2598586520351841, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16787356943023207, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.24761638648350678, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2781578586520005, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3796663901127053, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.46782343515163066, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6422853411921757, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.26403796477530145, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4493814923987531, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3935019932504074, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6071903247613194, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.49341196686542005, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6590076259512049, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288143137394372, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5596092732231619, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4122335241726334, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6323888082640657, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4117440619621889, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5815106363300868, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.20576804883654082, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.37723120852370196, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.12858902882463447, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3148709023566568, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17140863043800483, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.39681418211766745, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927237741677927, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7451438087039315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5570357635362685, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116469942298856, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508597296865219, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6833592152043626, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21259470439331316, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5863866793721222, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.44340494695017685, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7587103444385384, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38084051173962913, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7334510090568515, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26513488970168847, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6029932145447834, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.05923625523450714, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.31569765163635033, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.12062355101812712, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.33811435144101354, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4432782054917686, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7181569025811343, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.33282724824658805, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6835909948825732, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2646692275116123, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5971551168243227, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6263164471220594, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.852013904460107, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4940317095722323, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7362898909098208, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5112867162620864, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7497537018148864, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4667782254569818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7754094279644977, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4667782254569818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7754094279644977, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2359102917931129, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46234028881356193, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10177931989613292, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335479382455017, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459167762620119, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7155724078484401, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3826576187198625, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6071841372061269, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3447241447679157, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5531085140985558, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3178743908080705, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5513949312034092, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1883251048230039, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45303225382772006, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.019298976936385798, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2378706071654586, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49630155585737695, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3860973950960897, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6271680934322363, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.12899691256037651, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.10255693412858163, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24967756802190116, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.559682285505658, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3157197679655673, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6311766762942234, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42165893179697905, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.36440851219076265, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6157376412237141, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3969398322556475, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6236448690964278, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.38048895490051765, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6349497388372479, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4324371049196428, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6305851137521162, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4396289179879654, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6487399259023146, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.09896281892480713, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.376192516493049, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.01937817581496422, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1805414152287055, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4894585255537274, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.31128635710849173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6304411194127884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13308561809919006, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5312476702183977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2579124920342433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545120254366757, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.1595487507830045, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.45111566089364774, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5928508275621333, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.16510868745008767, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4762714594756596, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27710310401156996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216248191624099, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.059703448812578594, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.2883367116290346, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.04503583575448186, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.28569900523868325, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.303998162324503, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5931856951819833, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.21040286183905324, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5464826942840814, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2493264171390761, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.46012273577134766, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.40696144066138723, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6476222098586478, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3207863467056968, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6030950695494548, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39631066492420963, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6670602127484115, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2625805454451497, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5886806140244891, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.30857386418398147, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6255460002189321, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.23301500681900636, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4732064514809435, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.06797010899515823, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.27154181329396565, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.19568007857684672, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5228407307909605, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2879556779114461, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4554184077174173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.09578921953028982, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40472887922389433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22965669823067916, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46247819390492995, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08920952468433085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32241875701400735, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.06598288291076504, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3008263306617865, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2059931729749887, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4491812480433597, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20475739007221866, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3934874462686164, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.14091087674113076, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.129931470576492, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2945993949560173, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1719646079342664, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.308102700736633, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2534555198498905, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4514808121244451, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.09278864176093975, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.29077212346360004, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.35174128537520233, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5232532816160403, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29989984326599584, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.45201602124250284, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11684343186914438, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40293579310759836, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.28255079601170635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4828223682720399, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.33089917159160825, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.49423984666314635, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.06881975883969664, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3087076022347511, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2023651649328507, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1689706894436884, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32609144958957464, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.38729516708438194, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6571482446395243, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.32995628251235876, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5982616321404195, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4381454708258676, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6163746220282033, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3614922712385951, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5698489012763526, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3957463068583885, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6093311944617019, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2572958792096885, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5064333161464132, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4730742700342366, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6716818492415609, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.06730826637437216, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.23959487558830286, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.1929800091551954, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.37705878363065914, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.41278042192714015, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6060858750149657, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.028718620586134988, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.06819568935771328, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.28398527281580743, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5353252373488673, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5839068685770862, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7498987134962192, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.03231618598390566, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.018372817903957082, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.568128598260769, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7443891530963911, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3915774240356112, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6438987110697019, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.5052922639977637, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.7104337168657513, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.39901140710995353, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5456268880813968, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.17827215716412181, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.31830622503514655, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3173241691310352, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5910464434099775, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3665528144045068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5966563047685359, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2587297749908005, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5132023401682766, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2914897522509679, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5714769597200869, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11150937707712508, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4170915413269471, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.12061207067450011, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.46638924840013024, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11675747661776523, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4301556378791635, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.20057225201358211, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.47576367606491715, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1169017254476769, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.25243905051959625, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.07539975290981413, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.30092387998871956, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1078517242048809, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3886262536746606, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.09282999600999915, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3602616614804148, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.08048862002869049, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.38839838380229685, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.30250421218183904, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5314198518425818, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19779406326150695, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5032281717452601, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2674907183014193, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5237435675958946, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.21024692077841572, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5403945194972577, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1280398105600789, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5140283032612094, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.31309813582314194, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.026104354115338492, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.18449230121441001, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.40058346018376356, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.29894673648596126, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6322815922673689, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.18330256089173447, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5328391139635578, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3478966138007723, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.666271052510266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.21697301406549346, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4876777357531764, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2907703616727677, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6671226464299455, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.29019765706301537, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.598596485843401, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3151295371556651, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6242597159052685, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.07145977539457268, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.30420200787580826, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.07487205965472855, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3134735005064425, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2672991324984635, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6474323586139361, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.28904496671735136, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6205783309843279, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.12521098911929102, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.2534584650774572, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3486251970868468, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6519074063738273, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.31280763439438314, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.605102269641409, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.5047460217572859, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6981561913726569, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2622053872435742, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6045462235214704, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.25014180241386386, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5948201299027263, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2258043389079604, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5033553487331558, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.09841955325773799, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3327723902928814, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2615651536220919, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5337436257798058, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.7416488036617811, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.9156308978596118, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2924496936199556, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6040522123603048, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.35611859459201994, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6591871481895288, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3092067388523221, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6104451101668408, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.543070234136599, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7506439740838399, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.46290680573161996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.688869111662782, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5295672450222603, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.715157413474444, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.07150786783833522, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3509665568396921, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2173044897357899, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4571110459558132, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.33262718496001725, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6707552233208028, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4196855486150348, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7433904631553542, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2894921926769463, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6202383519071005, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5995826896426277, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7943692036315023, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6993946355159273, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8854180853818128, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.6176355987862611, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7898758502538201, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4475966481812816, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6848118022736988, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4559665634342608, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6856895897445419, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1488773345157941, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3502460666032722, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.33625310520541907, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4899679589833683, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6996203149315261, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23119301671666287, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.354782287640505, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.20419333453691463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.39470297247688435, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.14599223028360678, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.28718685195806315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.0680779227699037, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.29461063296452894, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.41177002362103615, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.18386904980839383, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3263831403119468, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2995728063785384, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.41824297302824903, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.14920276910823294, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.23372518880177304, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.15241326793273957, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2373874392211583, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1948950171081147, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3579044902117876, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.12792364540053502, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3042229576057812, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2423936863423674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3482875559347012, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2409168844747761, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3760904447135035, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.19123206651963354, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3643868829217187, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2409168844747761, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.34625648713313856, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2646814749718951, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.38312949443875044, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.2919568084302786, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4097202831960618, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.22076059779174925, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.00882086689569064, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1753792879326568, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3388058023792196, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.26230609318615344, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822825437641462, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3131514736435596, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5852253917268062, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.303083208699035, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5712741347584299, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.31961023654590437, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.624665235852261, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.033288372187425926, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.022279084478187435, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.06298574903914071, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.04937216027445676, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2356808688936941, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6055769871534641, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.07057336332240136, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.27794985614895684, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.05976469775528989, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.29483492026263836, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.2841435865247738, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4668115397161167, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.31925439389264404, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5392797996676849, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.29239744118171573, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5000428320937891, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3593473105345847, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5947494543532806, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3754906253392964, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6588309503016917, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.34059688207022226, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5628429966636888, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.41714597501205064, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.6924696604582061, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.30864757764602013, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5998796997663499, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.06402718864310818, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.34695960525493563, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.033288372187425926, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.01992327449241025, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.21510544667604684, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4846454728121982, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.22551665711721375, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5279805413050443, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1565711240691367, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5086519720619103, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1258220555845937, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47239582679683584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1413598390347116, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4830716882630716, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.03519816815769828, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.015973750745917652, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15991187032000503, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4667629327672802, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.34259841795512924, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.585466128440694, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.042741233133146164, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1133793152496745, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1497426195025022, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3726183841781829, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11435885039099025, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.38595569757978, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11740365232394666, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3950281226485317, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.23969951086481833, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.49980090988685105, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19984031923399143, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48490100132152725, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2610554047890956, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.47455871891673146, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1345609539980525, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4564753327718936, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2648638369801883, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.556024235512166, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15781880138100113, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3627260973544584, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.03519816815769828, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.015973750745917652, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.04375436627586615, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.020262091842738417, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.08905591495817854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.39258991796140724, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16839298730242253, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3815455050057558, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.22481285662586992, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.38610735170310867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.07292639249248285, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3325364976906501, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.17743299460161882, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4899555753373575, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.19057582910221915, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4718416669109882, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1314770573188977, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.42247455964888564, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.09157767943752927, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3701460681897939, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.06669809736714213, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.21891089447825662, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.10803064316196949, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.07765674844412951, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.13507002547130284, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.24864356194242332, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2689259076280853, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5085571523284772, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2280292808597417, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5045187042197178, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.07998996525280747, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.46694300232895003, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.20686631979541423, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5414930563125206, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.21347288910540466, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5213180608720375, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3288154767854781, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0619525837309077, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.20810880139024002, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16925466459550803, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3398252842976541, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34140765467102613, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6319466141042981, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.22592501932697184, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4846944833505256, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2928926658121417, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5528145631801176, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.09861322169661857, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3428977474554415, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33311017108029045, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6152393361388527, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2647140242373933, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6038381731388569, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2058592651002273, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4968187716376065, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1847269092785415, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1847269092785415, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30173859880787035, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5195706056213185, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15937946569579547, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.45845453966638194, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.15736573842205165, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4353622002928028, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6713478366673378, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.286424022785037, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6108160899721901, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4743785204576228, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7078409133478152, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3456425407114715, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6437361452928098, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.304577696074354, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6169603629801113, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3352630033761681, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2848709355004181, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.18282472485667683, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5027337480128451, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.24572492027154266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5262167453128139, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1809849704111973, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183477609966307, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.19464399422312637, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5198306336056712, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.19847761251706378, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4798124823955707, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.03599560515986058, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.016567810125787508, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.17500281336427004, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.44790575694816215, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2682764485524619, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5066876769170529, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1547981586771203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3205200486955593, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.26485738863969893, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.004773082147669651, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.08292855306993584, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.04879296088278628, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.18031240328973427, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.07730946223161274, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3543827361424084, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.3583463145772598, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.597082260723824, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.30842328963154475, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5533832756338398, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.27287437521258534, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5680146925253824, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2605463938154069, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5612230499845808, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2920134076564045, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5696793890490214, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.09857802458552499, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.37805549635837926, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.23183643082360209, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.167234964196675, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4598224413569426, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11543246428739175, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3867652322513648, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.1752510532254824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.1096600595185198, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.16260441457252336, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37196866466168216, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.296106081433828, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3348767210373086, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.21150430967143613, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.22520408495200187, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3055950800828411, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10213889902359212, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32878876614064423, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0854172444037865, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2822077407025544, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1471731939801332, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.35613397842834366, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.09550255684070111, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3229718971488525, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15796681910573898, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40727924114728786, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3956940134405236, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10482054709664401, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3662621832712706, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15232399499208898, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.22542170102122022, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.27278835848187016, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2574866101628968, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5192222793733214, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4165851519399544, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5626264040219835, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3279697678478715, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.523740736836828, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20079352324782537, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5138627523560615, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.44702012563994353, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5730388206570669, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26521877043131964, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41000643148292765, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38762928139075753, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5446552031353901, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15363941244107066, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3041152734335267, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15363941244107066, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3041152734335267, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30335178070262864, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5110748651753049, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3043760556486702, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5831741446799831, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11258390959554744, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2499701253990355, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.41775329557815755, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.615048701615364, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.47252884345725904, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5979074937874786, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4843113591527644, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6440496751909527, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3836378227271174, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.576300320227504, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.39081172824475424, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5754293696780579, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26933264604451856, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.487526438995296, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13334708422802907, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.32038773971038526, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.42383919356215566, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5611383064918709, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2062407699777119, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1636538054009315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.27030640495259156, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2533935280076646, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.35639112162596603, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20469187767870595, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2562732501285193, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2193938326426279, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18968668542064138, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.07694262297493382, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.07694262297493382, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20980831111140325, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.274654868986135, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.07303863538896588, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.22255642770836792, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.11368320018193068, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.304990814668614, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.26596587645638975, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.23235431918379582, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1468128974935517, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.31601347678307357, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.28536974606932336, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.22448784282354864, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.10694329815809711, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.29703553525902243, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2905178491524662, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4540869803464816, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4416037594311788, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926552552596459, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.4183954028764231, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4906994718051845, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.014073100248363871, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.191952233722376, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.5821113312936593, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.6682049424481182, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.37974703287224004, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4784059073150431, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.31172573638980083, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.45166421390889583, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1878322123464407, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.29465564326686483, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.01445896300020725, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.329340597116918, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.45129224979788274, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2576254437408645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4648884539724795, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2532199664925789, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4300207561067402, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3901440609455783, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.44745023619328156, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.03256664244421472, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3526878583155709, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4468173561897173, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.16652678576294083, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.32423547140462744, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21847844937497599, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3923992785132143, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.23724958596921322, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.35823441753358387, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.03256664244421472, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.29698362304848486, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.46177552759731727, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5441799557959847, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7471708187115776, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2737674171768533, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.49294682774777393, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.441371588380312, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477253956590645, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2900657692103618, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5959886771662792, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5465033074804745, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7499074934704908, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5021429409594009, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6919014524613049, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4286493815549817, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.608929454341427, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.07225602892167936, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.29878327654661274, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.07225602892167936, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.29878327654661274, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4397080749292039, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7141130990691512, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3135342425916377, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6210817766473049, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.25803866913759976, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.34620959646598154, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5617489123817119, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7727913932943925, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.40676766131178976, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6956515361644989, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.6728450601744213, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7864311690897225, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.3717155023854462, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6551663797221938, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.5215099214730985, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7532497199234878, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.32534541934969374, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4626445734330468, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.13240628161243978, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.343946203604971, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.4577369372875184, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.7101415565054174, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.36092600834422417, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6496714079703786, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2322365386782331, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6044384701497908, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3064251318019254, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6182391877177092, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1651157059535634, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5582712048370183, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.22649932377430695, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6026783583939563, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2039057030112552, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.571035829197345, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24691200113491704, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.594269344169987, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0911356222075961, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.30976821853610853, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.08737167851715875, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.30581414833940024, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.13652085900837302, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4761370326369281, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.15474877339130935, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4712355780453016, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.10952542777192922, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.2783128399737003, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3042729202167935, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.631636824383677, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.4059763180954132, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6572798986279489, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.43275966453933684, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6405112835577158, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3768478744790068, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7090128776983943, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.30905953562470256, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6622367498379318, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.13149894617910776, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.37815587581761695, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0033887281254162717, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.04357500712552422, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2362700080032879, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5323731887249256, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.21605302662107204, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5282568801368064, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.26458538438609513, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5387766932929448, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.13324009269060547, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.43938753038448325, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.08360795151624724, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.35429954345961984, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.19635248523500579, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.48914706397531627, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.11014333487863136, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.41120665566191017, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.15435869219704992, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44156603809589606, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.07222837169226046, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.25549328851755854, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.04402492652225698, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.24550650014690958, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1378413097704262, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4211572191546224, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.24870720433450894, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5735974400415592, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.013817287974302723, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.12536446978204066, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3457963945572483, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5975923648401492, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1444220103352306, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.48799831725044884, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.10621549048619741, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.431831433985273, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1719308255245778, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4713105639084985, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1780606498214454, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5082580772086693, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2914965018091027, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.034101561447741305, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.1264811292086546, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.19703715011933906, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4326807387314957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.07228800742761465, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3524948213332762, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3972317449044497, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.409648387810045, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3316829223225589, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.0041614648356221385, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.024012200597804383, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.09757710283079672, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.41975185347384797, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.12115369824118394, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.12115369824118394, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0791188256545285, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3180943786872059, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.01809539866686559, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.03594918688245323, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.27586328218418676, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.1113631734978524, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3741979529200897, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.10259158366128554, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4209327448560267, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.09528067873060077, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3544690292289135, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.05038767655098835, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3585941732179629, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.09230812022268482, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.43002721697522195, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0743936478408373, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.30237166895145057, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.0041614648356221385, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.06967699606840941, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.30067645885474037, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3972301056942388, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5673240077710876, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.27691593390925684, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.397254897780366, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.431440472360091, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5867820725582551, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.33370269242604067, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4403514506678736, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.49134520717123853, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6018727487732392, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.43276672055758025, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5477430910654226, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4516989732837159, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5415798785310697, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0818695212523483, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.21389006966831828, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.07468025823787741, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2295572628550584, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.27707852552725587, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4017475844016888, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.39805634587063443, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.48269236653883857, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.09896266491372588, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2200451105194662, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5433298898255388, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6539235920730772, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.5092920820600262, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.7015498636340426, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4868801987813637, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6780031796193685, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4807632521130567, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6004276937762728, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.4386676743039076, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.5540223698738236, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.016456805312687214, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.011389062176759937, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.11233810019914323, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.289567843960365, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.48640235222602146, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.6149323786242455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37566372103146395, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5970100838075844, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37989456711162206, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5769558960935135, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23324427100847284, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4221129348893475, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3115230560624543, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5400649415883527, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.019065204285618847, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.04824196983803427, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29808592153996283, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5278034227464174, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.23667062365446973, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3073763958460875, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19054540236783327, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4105353806160392, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2360968429109492, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4627612219764735, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.146556809091633, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.38590997021162093, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3895672786514209, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6195204168074118, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3509814893708312, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5555382601570973, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23953300557044352, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4748663920110332, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2705411345269698, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6382727936402589, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.26165878484802635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.597371720013285, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.08593880254067122, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.393760656889682, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.019065204285618847, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5136268735913038, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.747250567370702, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5269786261674732, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.7211839703961767, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.44704175546996683, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6464230283099384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.45718323554054785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6782668461340563, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.004290004290004289, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.024707645668534613, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.19943370880401756, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5118140585073162, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.01120180139519698, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.16357632783064235, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.018788314098527933, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.17662910365872764, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3688451938640129, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5612443053901701, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.42119189271590934, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6000905799312327, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2192051105601272, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.474604589222231, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.5977584263760277, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.8063885917966144, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.6868497310757332, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.8147345977692317, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.42149530038712396, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6765855139833816, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.47380853976119175, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.7354659271412988, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.501032405276633, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6978436898290393, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.07039178869925088, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3262556441298511, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.004290004290004289, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.22407318098839504, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.44990431651746765, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.18473000217570404, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4760100031203425, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.18239336435292994, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5029345390359463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11379273899572502, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4217370584767284, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.09486793917477333, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.40615685666658385, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.22878603824610966, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.5140335091788867, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.10500646136500086, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3190724849310781, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.09697472565363051, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3745932362705387, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.04269647757530258, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.19536240230055701, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.16809999283143534, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2738834400667501, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.06527413669170568, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.0691742922954427, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.48148276729321016, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.6931006881700985, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.2916733028854906, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.611629344099449, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.16688708659032603, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4728485884522032, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.12970181521722748, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4710457410977096, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.24709901811909443, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4876426822294573, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.08767210132815903, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.38627413137459216, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.033288372187425926, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.018468643623756158, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11695317153743115, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3888506293564944, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.24887831362342314, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5200881906842448, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.24135344239900156, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.546651751367433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2240750868020436, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.504592024551707, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.19599054363136678, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4875379508385634, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2297794059243321, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.573570222814656, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.23694057839415494, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5263543431854344, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.25259068609065677, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5384891408722134, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.14728710728025965, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.38144450027069415, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1398394229916333, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3782539243445202, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.11823191425473403, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4539200739128128, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.23499154120415805, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5277871993658991, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2327860836832082, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.30346043520808336, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5927380044714227, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.27438453676673524, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5889486960419529, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.26609880387709, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5830233490543315, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.23215933493807642, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.547665794111277, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2678221046101489, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6016268776319008, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.11478040986605546, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3672482256653862, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.06668493248636373, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.2624825156254661, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.26361106287028546, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5611266753056532, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11290965451231479, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4125077608038444, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.09225101275784285, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.37350112612982594, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.046937320085945096, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11898263078627454, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3841920009391055, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.13824120769041665, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4421654273346281, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10545905769586214, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4263824758050386, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.09561039442149676, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.410158328815622, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2036855110312808, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2036855110312808, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.009367682811159305, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.07902025848436119, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.12704269942859717, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.07842372770674794, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2577072248185967, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.2017781982444986, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4593303345267288, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.09289951019771917, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3765328870870861, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1633682038712857, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.48014665724836264, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11236616407443269, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.46893250816271476, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11106691492771066, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.48722807515184047, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.030634941698807875, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.02648378925084961, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.13737279171076758, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3592365999197816, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08961889092065865, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3874168292468809, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08100859619233457, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.24451849449561322, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2601769108810108, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.455492879700213, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.32169697571790584, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.4664391558122101, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.6054203380219411, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.09367325773640851, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.2641613455898965, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3471239643495614, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5206611437816837, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.13840054871865481, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.08099386688123823, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.16164940677303336, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.011179312292844528, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.028853119411472547, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.06268260360206095, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.16787977700016696, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.46635476894028405, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.6673933314595045, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2650114714119404, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109139879921593, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1860799414745892, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.41284800811511635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.32766922055009523, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5803713235003094, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2967631506608853, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.514979965349806, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.008968578184240296, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.08102231277620507, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.015082985407569366, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.13391621912488938, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3410315731485724, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.348007986647201, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6148736550683231, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.31222258402876674, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5549937870516303, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2706573913259733, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5619563043714905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.21331098311931576, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.47660259733052845, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.19850823739068116, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5129122522411931, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.19803162353826262, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.40808208228398596, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5295534280606148, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.19496249079519765, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.333772018311225, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3380794185743489, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5719371199531044, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5146400882255834, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.4917500979945503, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3407563025626974, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6182648747061787, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545705365108862, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5196627001050362, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.27048170758554296, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5452157067944216, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.27748702735605824, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.536627470224698, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.12259149958656422, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3778482967548029, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3324437360240581, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3472164938104332, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.23683075175361493, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2631328190836655, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.16455392433653304, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.13673885815184886, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.052821402483564636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10721126066665879, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.13492461680840023, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10721126066665879, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10845726335944729, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10845726335944729, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20388486867467934, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11458590842196212, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.08003952569169961, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1621568294816267, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2655543079691671, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1441134993558708, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.14973178994918127, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2143764616947716, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.14632805533902102, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.15249396726488093, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.014262006975939606, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11557977235371186, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.461597801606675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6280777654467244, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4224298950114519, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.60823085524287, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30451258861070496, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4983778740634126, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707860320039717, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2826845419324588, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5045089092372385, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3040559696901293, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5057907399711822, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28950110908559684, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5756159459598353, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.45886678012586496, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6234514801756209, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4378159930752921, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6143976526783714, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.00815517427893749, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15465401249808575, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42501995363729067, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30004556274899286, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.561482333900969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36769040719718776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4064141882459388, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.34722897369611144, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103553163121394, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.41559381311227495, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.37440084690294706, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4141871474340027, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3685328560625331, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3685328560625331, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36586001924521905, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3498976319211564, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3207894212676468, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.35316499124143624, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4170508650443324, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.44328515185259987, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.37150797394258683, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3707104995519665, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.3260490458373974, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.006130367300589213, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.29038853710161877, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3286711939680359, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5944310794747374, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4830189619506113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22970092088416938, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5537467826528029, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4685134392551311, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3498215108825767, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5324168516214499, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3359695440470467, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3096036988813059, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5894510883198948, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.32162444235121623, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.47895604751385984, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14485138702678432, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.39103385983908495, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14957644445778928, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4378856092523028, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3212752586558014, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5721988203556037, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2219217084770335, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.437742810290776, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.621154967713859, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.28507621418290774, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5782236466585765, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22481074167380632, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.49840634234674935, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.26751157705127454, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494472552960327, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.27601456328057256, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5472296545526177, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.11902363807507334, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.43385588622555793, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0066610108556241394, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22536453058221606, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4365811373563711, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3874773378787974, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31747697264511426, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.40797778663955364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3785761836985817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.28336087141473976, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38214734777528636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23841754841770157, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34481325534410395, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.10085385750978454, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.27755272407546105, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09864534161806562, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.27758211935930016, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2807763229912453, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38395145132718883, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2404134303620743, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34491830800436335, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3266011589665084, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4364077249430218, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.28336087141473976, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38098278649128375, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.24796413807329218, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3530186228211094, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2807763229912453, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38395145132718883, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3830564706224361, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.14039809832465663, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.30143568945161137, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.07218766113019179, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.24018250025773352, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2096419313570871, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.46935933364934335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.773055573548356, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.640995178057518, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3681829215408091, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6273930299436508, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4389321784429702, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.646847036932526, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.680579963615687, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13857910426205777, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41268771676068494, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.38993131397648445, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.38993131397648445, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3595137194874952, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5619162673780028, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6377690735568077, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10511846841633776, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3307148671232673, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38694317759010316, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6962124663194352, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5165413230188153, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8152163780784537, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43383878173729606, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.721993849834018, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.35410595762404473, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5902080019243605, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2710444515972649, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.007378883018336222, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.16440791304482247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3160213610127146, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5165614670038283, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.30758744700466467, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4684197705189288, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37169237058440824, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5383668331525606, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18655267161524258, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3640275543948514, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14200815884333318, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.37043264756085653, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18384948243517193, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.40935288248313256, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20050320605789015, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046291070099031, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.25149980262171323, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.25149980262171323, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14579837024705408, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3459916112351503, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3033939025154451, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4463607309375049, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3233187869272926, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31638337148949686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5386981918746817, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31518520840312125, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072004558983904, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2761603007895394, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49207696507318593, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31518520840312125, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072004558983904, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.13170034254236895, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3663183540703709, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3305034076847283, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.004516711833785005, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16951909200513385, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3380125247643079, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961516536011624, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.49803924348035766, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459667618766101, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170810606402402, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.23623790626704147, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441149448679464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35936994872479583, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6492026440953677, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2989025112582411, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6058743571913613, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.1280220256953781, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.3766998614914371, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4161791450287817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7054426787013603, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20333448190047881, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.49831934768554476, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20333448190047881, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.49831934768554476, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.577852219465442, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3387562718376491, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5665592722380606, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13267656524657334, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.454151879254267, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.33667089470100775, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6074301230157959, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2812488198404138, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.569576589771398, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39688965270008814, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.646373332434726, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4272870063962341, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6682855797405902, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2934521273973611, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6135709647187009, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4266201924661047, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.008777992747819234, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.12288887055424895, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4148619356639114, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0067104198717751464, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9025232868361638, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9169897590736298, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9234732618882052, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9472986863542112, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9709835434146469, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9951728990866464, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4888436840107989, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.8291158208899767, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8935248372106969, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9404428602061264, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4593546097889176, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.608602146246901, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.46662929903381617, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.16341242314728613, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.40072549318878165, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.13952118378975725, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4030284875466178, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3419664738551679, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4989057926975962, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.38985048513980286, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2830789070123405, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.41858897147271634, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.20130088157694537, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3833494604836931, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.20130088157694537, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3833494604836931, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.1958598294695433, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.43120286814245795, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.13461801293778908, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4435148347019696, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.3120938792834416, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4489929043142396, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6339860404289296, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3419664738551679, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4931724444703712, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.4844328956731527, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6798474086331312, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.24305650182597577, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.46912278832283355, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3674668904964848, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5894468068684346, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.35230796576188245, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.0064546295242688114, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.36092853787943247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.3397180516736864, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6494995648532881, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.23811989337799513, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4812080785035883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.23168799483443045, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5252672120228886, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.2889285495431631, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4536746865348185, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4136211902049899, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6393493534857356, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.17268932789342512, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.47806194925419343, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.42067720018268145, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6309653612961436, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1333644800640392, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.37482189565379787, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1333644800640392, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.37482189565379787, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.24821926635843994, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.40551062972908847, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4501073963060717, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.12049514584750938, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4795015038022114, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.42382856047421374, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6321659688090209, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6461322924967596, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.16310121952537132, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5625465668278802, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.4126152034907945, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6344543163574141, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.29340680870384167, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003179769986429, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.42347099628764207, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.0018234865061998542, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3722685688714949, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.255918614113723, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.44318862516624546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35285733014385007, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5945202859296662, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.34940338846112967, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.2506708132952771, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5592183664602846, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.25535827794051513, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6051855679458406, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1528571341245854, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5481360183906401, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22166358657237664, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45834104234305023, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1954411146921455, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4036830499128384, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1954411146921455, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4036830499128384, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22478920073209205, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4500155517039222, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22166358657237664, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.45834104234305023, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3439068946997751, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.37219737664729546, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6020217844686402, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3552281813814547, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5990409134152898, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35285733014385007, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5945202859296662, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22478920073209205, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.46169108941910525, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22478920073209205, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.46169108941910525, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.22857472921416747, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.46051531515770805, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.005304235332926387, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.3330162771465545, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2809009542151822, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819888906713027, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4573889291137309, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6974989991762017, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5409314026600619, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.8023475129738281, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.11372027710077005, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.43980817368282343, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3615855225145535, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5942403748569466, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3323217739558646, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.65584145837095, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2460137257692754, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5397894338370378, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.16331948281960493, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.46281592381471465, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.16331948281960493, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.46281592381471465, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5421002898382512, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3925121365052661, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6435846290166946, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.16927318970546587, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.43090641888389697, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3961867597457338, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.654720368848453, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.47467913885027985, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6998445450956154, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4717991357336539, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7489646628366208, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.5170969057682974, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.7999241778608444, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3813707100324891, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.6705221264670493, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4009045958498119, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.006260653272080335, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.17193972960972626, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.50925856841751, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3487575221722675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.15824382329465247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2847034639706718, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.1307655887510901, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2688265704976335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.010715460821011002, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.255774123517032, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3235165184655995, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.27185330211646, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2138567563219765, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2138567563219765, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2690830377349408, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3016154987854724, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.247428571279836, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.22537412722674852, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3609019979890711, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.22910978833981444, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.28534353976384025, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3383777404070013, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.25916413607200117, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.4126422185632712, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.3111725630388855, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.010275038134729863, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.29676390087816046, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.27914759735007616, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3778334876813023, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.33758329691923056, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4156592381734106, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.27914759735007616, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3453692248463458, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.21263609514366988, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.2685899741889062, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.19124822707796293, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.282934063502053, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1960019004146532, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.25373579926622103, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1954328575417037, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.30293053536483644, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.20717792614536482, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.20717792614536482, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1946191920482554, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.30211130048302853, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.20567299330048955, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.305308701368102, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.14998180732658847, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.2781311727915272, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.27986949820390616, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3524034144838241, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.19649146470411552, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3024070513474541, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.27914759735007616, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3778334876813023, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.010187697296954694, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.11767062888926026, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.2588701935884834, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.1399347491019531, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.30906837940069837, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.007792914637011044, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.15652048829355256, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.30581515496876593, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1196671345116598, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4250373762074513, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2805776238834007, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.37464507476331055, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.27590775387590455, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3308083900531814, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3539896048133756, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16177172185899982, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.35656294562478846, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11515309925749272, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2804443217107937, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.23638531153934428, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.31895726376311057, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3821381273701999, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.010064648984723833, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16496938783065193, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.42948798933258475, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2798876939782931, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4858922583959152, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1196671345116598, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.42091448842491763, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.34861239437033315, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3458399031409789, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2786084204986201, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.20810330808987174, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2303364277054624, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2551158426117226, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4338792723307896, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.15426765696281117, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.36404432411255067, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2869066874289222, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5218678198237495, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16618655166630525, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4518952099931083, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.24835336815593242, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5057077090166618, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3991460047555395, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2149405271477231, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.40802426587644575, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10925297262553597, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.24033759271012042, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10925297262553597, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.24033759271012042, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.15337404749451009, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3658108225254214, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.16628701046894573, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4031359332940322, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.022674082413435877, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.3073229709775776, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.574087139172448, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.30791810312533313, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5601926721019862, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4511589055888862, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2434623104231637, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5966001227386803, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.27966356291643857, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5478435692252245, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.10845182904139573, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.31173251865601403, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.007792914637011044, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4091528869755915, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28859523083492383, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5243060035864178, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4258744680390019, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.678077932919771, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3964513253420688, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5953388302269752, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1815358071299676, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.38935610444086793, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4884391791679525, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.679307312191218, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15980518115118317, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462061115860011, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35138749399652214, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5764814746526004, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.16771930140892102, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19857943409196785, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4388967511840155, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.22743363869750483, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072109323538316, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.16749684977011461, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5366411241731205, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6918385856485405, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3423375720396189, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6262531466543148, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3091394458025272, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6747861905645746, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.022184632732552174, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1294619581457805, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3243466207565264, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6005859006330247, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.13796620851017113, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3567938875935946, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.007281902334909347, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.21686905466335824, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23626187156344583, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5259172349664247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.14065108796517406, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4653016103437884, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4188525447915417, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1251768639938174, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.39064868761178284, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.11847662670132668, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4766412062430795, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.15578519674093896, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4636631211068481, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.20690996611611379, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5406044734296481, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.29399556903245094, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.29399556903245094, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.11554716243103427, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4790979457669554, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.19312720381560924, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.482135799730948, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4908356678958071, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.529525057741061, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6698621490778723, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.36896156718464784, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.6019015041179977, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4295721431866771, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23966219681055514, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5979264101215074, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.23966219681055514, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.612884677361467, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.25830145139191246, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4896048354612968, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.006778644903960927, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.2846692049142833, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3637736731596556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2840158072494531, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.30509181191098333, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2674112241085937, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.26800693547499516, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.25491743114085297, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.10556737722881891, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.14053789733354333, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12201842178576372, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13777657698404142, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15032184952222774, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10929637020896284, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2609493813392874, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.28145874884789657, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2998660085346492, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.29614096401881523, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2410288519041677, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2970891902334263, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13122372520872347, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12459914499732376, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2338987151311022, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.31123452089299364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4756674690337691, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.18506115334020554, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5096863469728294, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26626928257104715, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.49325487167196663, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2579969134044562, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14919322440986654, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4210541594444331, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14669785543513997, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3937770501596208, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14669785543513997, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3937770501596208, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.18634868443385502, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29586489665278076, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4674087352393852, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.15833672839657353, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4191767182334318, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.267475684709543, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2033250076872028, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5589196418579114, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1497342212184862, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4249764678866116, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.200571466871073, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.45594166666182867, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.18506115334020554, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5096863469728294, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.18439251865842213, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5106980996855047, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11394659250985378, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3384585010539326, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.007866479925303455, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.338607040915058, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.16510653858282376, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.14514613027046297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1479818938322678, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.13842309322242968, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1025254666917686, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.10386784995254054, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.07271633793961262, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.11596632790546842, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08880223379062127, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08880223379062127, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1302790809384548, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08208180311085098, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08880223379062127, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.19015931169221686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.09176378853337983, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.05932404902368635, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.040354945223762775, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1253385069411426, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08722222222222221, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.06084090533083763, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08316107357532411, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0744575238654659, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.18081240117184907, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2406197931700809, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3480566207137981, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.3002086140405867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.4112215754871572, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.032062647754137114, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2188350647878129, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.19393255454120817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2188350647878129, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.24181257538464912, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2727542434610149, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.24181257538464912, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2727542434610149, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.21241965502986587, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24856075230680402, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.12181575762823904, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.23698883946199675, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21099108137460598, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24619894242367388, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1561508712265784, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2966884811633862, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.16012807922240752, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2825590657535655, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.20992240303256396, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.240297594424442, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2164643623146457, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.014140612996263338, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.10958860302852312, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19089395298343978, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4775461600364851, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18762960550870147, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.505905280342391, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19843246352066848, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5254585376129028, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.11358085943578489, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.37976315415300393, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18762960550870147, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.49348230695305667, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.26766427098586376, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4581555129477761, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18762960550870147, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5048762254598883, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18626300660245443, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4752534997380396, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18626300660245443, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4752534997380396, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1847479638157385, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.49046945772456596, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18762960550870147, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4918089443050693, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.11960689244355235, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4380448627488294, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18796217698593565, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.47551817162907684, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19843246352066848, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5123967770684649, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19843246352066848, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.513009074208049, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1847479638157385, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.511165178065381, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18762960550870147, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5270918783222831, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.26938358189161943, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.46625322327819374, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.006565031463583203, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.28393900027333896, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5191104888927159, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.31087084319061986, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.48493403576578487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2843877927900334, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5287507679912548, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3758626428491304, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5683038108649798, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.12412599390732966, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.389443562378031, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3375930268579116, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4797696968747708, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1930763338913091, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.34256550742622643, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.35540164109034167, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5028380576753965, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.19290431286720416, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3438946440208046, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.11998609496359587, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.30206893123564843, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.19028557421845702, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3481227049895071, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.10880654430256255, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3608349900973628, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.1393878237505104, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.435204703197965, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5881445232832746, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.46198675363922825, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5714763979586504, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2573016641379907, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.4456135188371608, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.23185691851902213, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.432960553422533, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.1967483629471718, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.36892641041012875, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.10199952656676965, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.34660334168484125, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.00622191087653493, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.3231291402565904, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3724134618099435, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5411724478620953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.10991514729498916, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.35889788560857044, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1348135718388515, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4147071076319954, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2941203504244363, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1923965056241428, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4584215932853988, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.31291633863045876, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.15300252291898875, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5221038026847953, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2758817098791865, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17758569532749288, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3914970015153563, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.03744280238972193, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.23142549596796302, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.23987047142780468, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5083946276344661, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.26442043774364626, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2534901676236048, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4596959755675812, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.12645915844702074, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.45700241609605563, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.1955198139027402, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4488382703464876, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.00643824307525122, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.12179842317921927, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4354660335624341, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.32560507773821273, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.16402090918662934, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.14957644445778928, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.351130925091254, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.30229895292756775, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32013836373742083, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.28969342447685126, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.28380664599515204, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32374790470401554, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3319100075365036, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32692597326475636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3003972271624958, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2947899275409462, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.29440875673203876, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3007628551722125, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2702894645159252, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32010751096684203, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.32010751096684203, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.21336052387554708, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2830838262503855, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.283868198184406, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.02875707772338124, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.20715500016834684, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2138028589526133, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.28819072249233424, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4281025086478473, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.1722187017293513, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4101544500733319, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15007225500722624, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3392953950658033, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.19587204862073598, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3104671471684385, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2461253685468257, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.39904238157881977, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2051692646110151, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3224454187442011, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.1784284488020755, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.28915891965402907, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.10372344027345098, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.2615059403670134, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.15072969952252693, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3087381774825622, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.13164235883295433, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.29495671936941253, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2879469254908736, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4591810337356331, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.20362939771829378, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.33399297886621754, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4683082256972011, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.32995315458175184, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4840937399079923, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3013756361449186, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.46623994476708375, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2639764334538811, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4159003903315936, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3023095132730679, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4131900681214031, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.037800765161043304, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.22607798722887731, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.005962033319367563, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.13419909715593323, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.31160827370931377, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31643186523025857, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4990369740896189, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0972760643475914, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34268917796251985, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.22851741162637057, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.39052360649350154, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11560300997546753, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2956909760018388, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.08749655303939702, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3126423887518639, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.08822997736304256, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.24974724440250298, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.09500509002957103, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3165301597200995, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.23609820045506813, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.22367402402736386, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.31291379562636384, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2554146380661974, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4265387920961467, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.260913547856905, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2494006679029331, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4342326033743038, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2494006679029331, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4342326033743038, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.34936045232359575, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4829868973953706, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.12209516941390675, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.31861071050995227, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.30097206079743416, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.31498330380638395, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.007580249288701284, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2029445207805708, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.29160060199573634, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.41859679392459287, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16457404825501376, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.33333438552725225, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2673465335275293, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.44086110954461233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.139800134566647, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2819303243365043, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0775275441607937, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2751344778397747, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.008444418839629163, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.11183447695967942, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2981154499191547, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.19470810630637406, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.19470810630637406, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.22241585725803045, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3993550498239767, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.19469181333313726, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3702922644010077, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.09881511482966875, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.26151080011047195, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.4144465171640097, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49508811046905704, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3110891870529496, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4573498364685693, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.37070419185260556, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2756918913516653, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.39965414615166545, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.19775876971405587, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.35249250205040417, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.219417732032655, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.008423079864900701, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16832484320875543, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.2952766411314804, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.20614290043638034, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4026346891273123, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.10458125123016462, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36412391917812675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3208902701740901, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3438619394909334, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.22495130162065566, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.38988663574936366, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.21883928293594496, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3942803458040245, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.279651701540242, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.41325325835097537, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2864494973378512, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2857216432461296, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.28755822790741375, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.16195919024831484, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36006146480614665, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.01004039572418084, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19086203703682342, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.40603556087396925, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.38060342997003915, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3652515753638373, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.19001369979060856, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.40380794812919807, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.15892913346954715, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.385345061282164, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.17179393697897524, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4061991090939492, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.010113508730872816, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.27234776738653105, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.4419177841121404, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6137680308213187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.25567957494892185, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3876058827430677, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.27643937532700463, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4260558157086469, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.35933814503113937, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5180379308134967, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2676032275663791, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4724604738900925, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5001337105923725, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.39284822049881124, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5731994852253092, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.32244195194244635, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.32244195194244635, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4583677548612845, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.2864900351806976, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4754058246795019, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.5259681099822366, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6938905020324472, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.39615805576005414, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.625810777040839, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.39615805576005414, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.625810777040839, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.20296526946860832, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.40148740945870737, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.6136121029677094, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.29898984447859595, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.5666357652072354, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.006388648796754671, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.39360739118751503, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3914713795199333, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5396649879183305, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3049871822195904, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5208588286358844, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.28654756270464754, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4454875631569214, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.15395769745986762, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2834099674931056, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.3203990808307766, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5773010307507899, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10660282697866422, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3328907070225636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.32284785906314306, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5391266648881738, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.12216223811537863, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.28630884247642424, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.11751163762525296, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.26392313732545414, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.08616885233894021, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.23536419207761525, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.07458227402640676, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2600891962590108, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.10704047550187275, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.31257857817030105, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5024602460502914, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.41452065364685486, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450885611283658, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.13532043663194473, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.46999609081501686, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.04486784453363359, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.28812803702893347, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.25834168012156994, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.48011486442896073, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10649892433136982, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3005075192060885, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.007250479139228118, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2196332370857405, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2896941452360586, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.41709543651974984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.20254179344857384, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.2105213575043129, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.15545256448255348, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1686298660722932, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.49742169356752797, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2801468315735759, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.4843599290976803, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.33557244321033963, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.5209156086024106, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.14475892711544727, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.16309206463693382, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.13194265905224933, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.15365844181846375, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.3886375628227098, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.567076716470624, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.19923514659353367, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.43916186094127363, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.10871164505221177, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.34283761263483875, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.39447879527991686, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.33419222746019583, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.16395982449057947, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.1134519688019252, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.15866683249809552, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.35536785937479715, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6031612036218008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.736286703381354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39432344823662835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5943452555220106, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.34437686643287496, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6090402109312658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39205580893266934, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6772940233934857, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4263005628892719, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.666576816248374, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.34303589686600006, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6476809000259773, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.43103580001357805, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6690742226623104, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4583355788727905, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6740129255664499, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4583355788727905, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6740129255664499, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.478854281434795, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6734455797843703, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.478854281434795, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6734455797843703, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5108209375191675, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6791666897129686, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4330463947479356, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6198346106486459, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5730366636655709, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7197637548987978, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2981426768485538, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5882799317365235, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4263005628892719, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6688425476017256, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4263005628892719, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6688425476017256, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4485462070116169, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6460034217597165, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4579102348988084, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6417119032346416, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6252078221435556, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7406162627381982, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11254397891886614, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20623288988983426, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1100081929352474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.18967061672400035, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.23609036869909603, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1498435848533153, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21051700087939107, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11483748934907699, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22187391438286078, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13093265020876002, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24423594551873207, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.17652714369664665, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.09058415367867992, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2482894089551725, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.08409693408464493, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24281987895429524, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1864036495127383, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28188465375440136, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11997139690246741, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24909548716299362, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.12326237503743924, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.25431387500879243, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.17755132725434278, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27939552769052894, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11552449340806617, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2255572924690555, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1445047538382198, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2737322242154943, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1957899789117337, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.32253417440653254, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19989838721523703, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2754449184617433, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21286049738095594, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.137248043368656, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22329074990170197, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14969363386531168, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27820986095394096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24493390281390082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48113625107113883, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19476681308252697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42030407727741037, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33600502687041833, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162346121569341, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.10336049249219333, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3088863284587533, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2176929795148492, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.45404283402200407, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16561315331829, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4491486313807806, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27190910124573536, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173567851798608, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.37648087568533606, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1673300634787508, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.43009820921524167, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.09851325694216304, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3616605984753398, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.278818795864662, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4519726808757103, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.08108479333279653, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.32446242558225763, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2985215837565239, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5357534811195178, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3606261467953634, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5657695279587265, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34854547753540127, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5565027260893921, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19593487880196195, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4136765523891332, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19593487880196195, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4136765523891332, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14455268344043232, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3511816606173986, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.08839512340686698, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.21177781620127928, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4460741740050364, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459789902390003, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5620330456296532, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37825713491091884, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5584414289480568, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3695375029926146, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.556875129479421, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.47923168144435746, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6534660189132082, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4200122602645879, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6060665795950726, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3118437333980883, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.49789195197291464, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.429512074830509, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6066779955199886, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37661083833298264, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5627715650901804, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37661083833298264, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5627715650901804, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4257605183794877, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6026940597371309, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4146193110212601, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6016308462520554, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4185497912616041, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5947798105864397, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.39653893552930464, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5591087327390277, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35456438094510545, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5476462735264316, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40518022025671885, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707666164180741, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4257605183794877, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6072620760408021, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.42496721529949505, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6168976238774702, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.42963275745758267, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6275089000554018, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.24287220388451114, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4829182994799567, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27309322054464596, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162255850430824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1845747513433909, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44379971518505973, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.18212463619188357, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.469592540371137, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20734616999079872, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5036833880605232, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2817686971402115, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5170853673805775, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2701727285191305, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162766222516658, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.09950615774798431, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4145753205477691, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.09596136927307748, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40849147213099996, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.37365414901389854, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5663575595102016, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3931799632074899, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.571500205451563, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.216062485604554, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4780977009860418, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20768860794928373, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5045131603567145, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.19482672490156053, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.373972045369592, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22546521673609302, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5396972089286257, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3347794278838163, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5836257841202334, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23502778906204924, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5533644883224328, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288562544630599, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5875530351959068, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23858161101294786, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5352521385268106, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22345694274330022, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5021464201834536, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.024449792954766115, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25748397762867226, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.49702079004924316, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2517176762753373, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45137344500317134, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128384316903283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48016279207050283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508847643803501, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.529198044527105, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2674628639054191, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4784292149775752, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2643715066746798, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4590454050790974, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18555265687599404, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3943451217336116, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22972631482860506, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.436102988762466, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.25259185051653144, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.42024044012027045, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.15765331776480065, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1587543502252646, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4114443619817223, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23252634497159352, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4176924459699904, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.10289033772949932, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34792454493804187, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35541324629951093, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.49416627591115303, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.38988999818957765, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5383555577162046, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.267457541157426, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4673846703066711, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21808070471467408, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3966492622645894, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21011127120840586, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3666764487561306, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33305744607256593, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.26116607863611285, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09196922936475649, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.35950194744727476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43200638115383627, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6892273787708799, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.341195158470265, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6539473951166187, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.47372467075851415, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.734800469477975, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582838437615822, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7625459507115938, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.564140121947718, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.750543081695207, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38962400400495395, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6039376140178496, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45026965676007474, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6668256174353906, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4074412820541961, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6128620944765008, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4074412820541961, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6128620944765008, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.310668922100995, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.58212864821275, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3894734858195671, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.609378229268181, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.51931843676369, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7468840809971427, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.729605098531811, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8833183865941937, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3096348871649183, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5646940504012421, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5585674160229753, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.791505922278621, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29504037076486817, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6837809127705262, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.48935454710294557, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7097085615439184, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5723452380589045, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7571087352239251, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44680913024590146, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.648473971864945, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8247818102038394, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.40475700826319555, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4349871720911447, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.47497024539412314, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3805666011451541, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.41589484475679384, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.38511373700997104, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4710260495003035, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3719027199086929, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3530084316582828, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4677317890018283, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3869982889970619, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0977318385642452, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4258156920135329, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3787500122126683, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3752043811916754, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.354353831625583, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10235881838919027, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42794399630326124, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.09230372260850295, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4084071495562367, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3777505211336872, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.09649622940465846, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.29275810079464665, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5244380103905697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6914581279144536, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4472834999328078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6457130269652316, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.403469748891042, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5836273992135024, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4521209970489246, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6307076431103672, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5201565256464291, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663170490872967, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.431319746325093, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6367129659739652, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.310186302993101, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5434540129901786, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5844803970906615, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7063190919471397, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5844803970906615, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7063190919471397, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5201565256464291, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663170490872967, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3234262986694916, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5509482496841094, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5471998982127312, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6620274291313689, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5471998982127312, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.668797436013741, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6201952806555309, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7357975825250463, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5950978682255068, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7209575532500453, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.36634140441362645, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5925773491774018, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39359079123898666, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6064279839176828, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5201565256464291, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663170490872967, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2672991324984635, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378982230702222, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4880149105083363, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7177464929662396, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.504154287515855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6074467585243234, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18771816026273827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37594160796244835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2828480467326008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4330386622117487, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.26314173809974317, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.41943156806161835, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3180687407553289, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43750976374662714, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.13950796967929138, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3461520644408903, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17207258849758605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3052503498954155, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19047974638917908, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3156575555065382, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19047974638917908, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3156575555065382, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1377448219106278, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.35651447515721807, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.28599478270153655, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.500611405605832, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.24797752962743086, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3393109592089468, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5094767086148101, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.25578230816507097, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.45959138316050513, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3653634812607, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239315135469935, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.30019266689543556, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.520168227007293, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3405782536352289, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4740333086460412, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.19955745890837207, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4701104981474493, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.022925118914031796, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10793991565723801, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3418311350990793, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.33684416564135483, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6173496967095872, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.21054588509072256, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5020237474009813, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.21083122707088572, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.49504056885829906, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.22593581165006588, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.47811810874873667, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.365372700320629, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6006612018381494, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.23288432092807593, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.571224820704715, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.377949467106015, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6288808546806746, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2784369069281814, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5290670343764194, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.2854915274011029, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5160719090963704, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3335574881036169, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6172751686457948, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.30819625273001816, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.591546565802045, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.25393951817299554, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.46388056771039937, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.39359852091634406, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6265459059804013, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.24720109346452496, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6067542031388974, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.47194552522795125, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6449793729895639, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.10436839690765871, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4723697955467262, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.30819625273001816, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5915181594073229, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.25003907776594525, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.5740099623337644, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.10480708799994727, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.4347737895846244, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "bleu", + "score": 0.3712375815038101, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation_to", + "metric": "chrf", + "score": 0.6205370332736169, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.11283678603002038, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5027560731174364, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.29432909534200313, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5637465580755235, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.20665163138245418, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5424961081814776, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3720123244240524, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.18586302203348085, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5550584875959773, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.1663603558532716, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5225044219427867, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.28341626687166926, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5869314876429665, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.18279722364340092, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.47825835330014377, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.18279722364340092, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.47825835330014377, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.34496242859007625, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.39992395664256997, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.3948810099417581, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.429294349316905, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516072032987875, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.429511438817035, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.6426984035529237, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.19771661626342427, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5115994004182517, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4457322258249424, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4346320450539645, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.4980697864199004, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.008086388726125911, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "bleu", + "score": 0.17781916046116683, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216356191979474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.30894994002746395, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6495798576994254, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35806497640912766, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5522671396375264, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3391824705480895, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5823536571792293, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3369582032493922, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.604856884215657, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.21648792313483747, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5842434739000809, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.24078435074822624, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5511929774495076, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.23399485663908418, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5631067041333725, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1900264383673692, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5613137257427737, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1900264383673692, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5613137257427737, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.15084681924900642, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5314568462829651, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.26981967589819256, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5640017835905705, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.11911066129095806, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.441652518518041, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.31818246779032233, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6229693133713873, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.35190530004480436, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.65019529743538, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.28809973780460224, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5952046679740143, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3315037521841549, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.588148042382191, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.17403647602332778, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.4845935914407903, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.3337338878596308, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.6611983572472826, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.01890321292509088, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "bleu", + "score": 0.1721831215207535, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation_to", + "metric": "chrf", + "score": 0.5841092375226741, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.25325897139464854, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.511461689033225, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3730973285213212, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926422939507472, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.21478093144117116, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5322186790358018, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.27204984763557305, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5456825634559386, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2667533719706517, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5290480371370871, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.18081719101162075, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5207488300398541, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1793410088328766, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5126133936832279, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3585760940563117, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.599983086978317, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3585760940563117, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.599983086978317, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.24328420398524073, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.4502822427440237, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.12819160938600316, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.3968207935195225, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2846273713972197, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5068034604649062, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.4008628432878983, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5958332944294813, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3938117685205575, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5683065750219333, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.41505282193631027, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.5557512735595823, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3369338116697911, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.593905704810687, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.3369338116697911, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.593905704810687, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.1852365018195853, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.450559932956108, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.07367272607925157, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.37211692202201907, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "bleu", + "score": 0.2571225648472028, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation_to", + "metric": "chrf", + "score": 0.48544337623818506, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.8827916928185874, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.9278293769424701, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.5919743410620021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8142101616656354, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.2762822897608569, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.24942094354139677, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7065161304293469, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8047179188276988, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.23537164857894743, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.689799263500028, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.6642718379939968, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.7768492311706325, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "bleu", + "score": 0.7660237942267061, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation_to", + "metric": "chrf", + "score": 0.8523393041110139, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.162496560019558, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.47693678197649336, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.21126480857843466, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4132516611502927, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.22654272969286518, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.4258225502326524, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.38876834795530796, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5513827117332133, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.16042342632389064, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.46873038602962747, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.19482023423146808, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.46755793143523355, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.19355919558818033, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3931759629568022, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.09417456496733598, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.09417456496733598, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.07094072500797352, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3513533312402496, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.23104354771619826, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.40385922879070485, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3633072011801384, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3592209878136327, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.5721408824958313, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3272043176965996, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.49540621006596375, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.3322425567593946, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.505302238370584, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.24002944603900755, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.42818698139212963, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.14662361810887722, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3944286703083527, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.26134221086624515, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.25501149596911343, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "bleu", + "score": 0.18038473571185787, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation_to", + "metric": "chrf", + "score": 0.3719715144126504, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2044743996680912, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5296253104099969, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.09990095999535835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43541588074965143, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1401169793122292, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4782044537286589, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.08807590307622687, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3086025862359103, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3472562426730641, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11696299829645894, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41815951692436326, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16743424359150172, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43311618815403874, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3473759201958039, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3434479962854036, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3943889049582298, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13754878494693165, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4400994484887776, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2623022555609652, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.09666926200219352, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43859358701110374, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10655498022151058, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.43007147285347036, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.08570827906588853, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.444312684802016, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.11265744222717027, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.39481530246324914, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1365091799434783, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.39718585567374165, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.34302997554484677, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.30152990419466674, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3404467436981749, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1689554748507331, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.48752020586891187, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.09863022371664866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4410680148316049, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.1059024181148723, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.41871695290011, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11117895489854909, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4609024218314948, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11871704290088221, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4586403617409107, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.11481934989482791, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.47478753279668967, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2089685256289425, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5333424925429209, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.46777164721381115, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.15744827002744996, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5597535178949913, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.3906806785529241, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.37503463106800017, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.26125230144349915, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2789924975655291, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.5437335609503513, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.2764393753270045, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.521430018161165, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.21521325160060412, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.549817688316955, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.18948552254902223, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.488827004785696, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.17592391261425677, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.46351362131094864, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0878497559398328, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4566528137749558, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.009020025336134992, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation_to", + "metric": "chrf", + "score": 0.4228408462744233, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16157821959747307, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5255622543684244, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15748430756119847, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.530020236187551, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15089318423122547, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47428317866010966, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14391777351450838, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.452101140887801, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14888606314762867, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.49729350751634693, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4797608004637253, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1296922311601412, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44753695206116967, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.25815489731794905, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4911644025320733, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.149290338098292, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47670219817575576, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.46746502686613134, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.10362222834266982, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2553804995442199, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5077415230970365, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16531071130846114, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4890742061712915, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1768738680104602, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5306318459311525, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4846286435143049, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4878418865274075, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42723061275235275, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4315166819282004, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "bleu", + "score": 0.1428943539626028, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3929455911139461, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.14473479197868241, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4413634590503217, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1842259592735289, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4512010919409926, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2686541301611512, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.48130750228636077, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.14912638561111452, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.39836399000486794, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.20742086615316124, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.49878576838033467, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.12154039608017253, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.41841928964590686, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.15415302247076879, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4123343961300446, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.07756347368634367, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.41174180414190475, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.05358172355226594, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.32818360721376694, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.05900686222697278, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.30171628687493407, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.10650846311095664, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.34263663427911273, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.051646114372149526, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.3047911045736194, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.1299080632357416, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.42087503139452254, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.16733849005024246, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4752813131701307, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.2800051945520251, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.5002082179686791, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.06583464674277996, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4505447671196048, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.17461391742177654, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.42828976335277197, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.09665694287589056, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4197271807950379, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.11578004334848788, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.4640603998690919, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "bleu", + "score": 0.06312139294970891, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation_to", + "metric": "chrf", + "score": 0.33724247478267805, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4566114002517467, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.21245026220526622, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5450614695192502, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.23534125777852055, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5178557103050311, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17312100511353637, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5402176694286381, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46657292439622594, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20587673103743992, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4757245820906668, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18087140599571747, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4377272235852682, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.25254176612339824, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.29650451982117443, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46978147835505735, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17882068596625036, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5408595600390174, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46096224066661245, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20587673103743992, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5260086668248203, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.33325351494406774, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5948182731714917, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.28478979354294687, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6101591430933082, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4978503856882466, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5116515364507819, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.14170261452878116, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43601861264243175, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.009816976525892437, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation_to", + "metric": "chrf", + "score": 0.39847493267603823, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5049599536473849, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.464305874546181, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5141921869849101, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.19363268497489844, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5704206116375886, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5154606393377016, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4757656003147759, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5469867443567507, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.22000458484755875, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6138504491885896, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.22000458484755875, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6138504491885896, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.40836418341872194, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13237645860785527, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44855614870199234, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2399509189374762, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.23005567239800093, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5929496962654774, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21980503399839907, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5614733290167098, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1820057458455135, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5620165305042889, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17377208785560805, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5789527486073971, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.1761368775330164, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6050330397163678, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5589534921007193, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.38966035735880794, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3272263309043194, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation_to", + "metric": "chrf", + "score": 0.62085012281251, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.24468026894076475, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2749771763892432, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.24399387658656807, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.3044305895074006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2005610750993972, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2780404941957844, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.2694630167370006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.29184952402062775, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1651529607540041, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1596886882102109, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18180630412228768, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.18034760660633942, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20587735759782932, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18120506204127576, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.18120506204127576, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.14637016199691527, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20329087523399839, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.18842975899221734, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.17075840357973465, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.1740044679403827, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.21605512928343784, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.09165698799845141, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.14434599942615042, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.1524760851870573, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.20099656832951143, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.16757404945398316, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.08361504479780595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "bleu", + "score": 0.14745425511282265, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation_to", + "metric": "chrf", + "score": 0.2554284370352702, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24977763651196191, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.21237639792675794, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.20719152387981773, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.017009689511280856, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.16261583096444793, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2819800492878427, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.1925903910576669, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3671206201699281, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3147902135663803, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2726119137509564, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3161459451670598, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.24244700373036193, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2655680598205705, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2291992284121387, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.21972046355478686, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.31901376980971496, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.31793981684013795, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.2163965995950386, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3162880684107879, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.19276213214427151, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.35422146249879416, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.3738879430968546, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2901450731950559, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.2702695112247082, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation_to", + "metric": "chrf", + "score": 0.22144725090715137, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.28494569863631247, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5314415165747192, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.23269544971552114, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.49749977919083926, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.30764071640662, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.6086887537276772, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.27197512669320856, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5796304580865379, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2560035504808655, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5191300553291102, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2755198905868496, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.53562544881003, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2693653652960018, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5351323891749961, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.28048057329781156, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173444818125783, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.28048057329781156, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173444818125783, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.16351916864221824, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.48048823975265614, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2652698379999432, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5353598122519497, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.26651453693882154, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5171512452165166, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.18374033535449952, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.49913992421569536, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2435306776669555, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5166317384168408, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.2294197978519848, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.5102280023573198, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.453447647053287, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4578627549446598, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.19991159172412357, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.511242030236653, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4166251963329014, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "bleu", + "score": 0.1741974301903178, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation_to", + "metric": "chrf", + "score": 0.4671523918556039, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3780253819893537, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6691463628105327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2764824153808333, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.669057962133461, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.3202552754791998, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6434556886263748, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.25603152270914875, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5814754099825138, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2897057417455179, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7234741868658244, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.25526757194809524, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6364430511716954, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2503202840132539, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6628587960881935, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.24053590568750352, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6655821853703773, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2778012827037069, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6711394082886347, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.21629919102620548, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6724473560651265, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.09485180281084839, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.529779760886638, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.19994245408068972, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.46449853381454226, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7639212953985626, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.4646770361393315, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7875365172767899, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.19875798613873752, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6616311958065841, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2454606700587796, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6558538403486112, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.2446000556262557, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.7189156627247651, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.6314478474842457, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.39353544541599106, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "bleu", + "score": 0.10301175303708132, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation_to", + "metric": "chrf", + "score": 0.5376516921096433, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.2991476612258654, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3082702687589522, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.31985901752714363, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3093019888260462, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.38122846504266117, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.28289355963456897, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5240089168343677, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.15197630365858814, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3903562094322204, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.17096461637332724, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3692462954812726, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3058932494795886, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.45803166077513013, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2215392629217546, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.41257102906156734, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.332926296884715, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.23697705757746226, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2718193327400847, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4809886758134228, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.236183472854973, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.4962696728269181, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3397501129479015, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5046267909194274, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.3039889130898378, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.5189316341976646, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.2737404369432603, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.44170111004367846, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.09276734202644997, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.34738831385047086, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.038929274966113724, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation_to", + "metric": "chrf", + "score": 0.3715413713289065, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.125959010609916, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4578429795465262, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3002210625771089, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.10836164033218013, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.2822470723835753, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.30837343649807475, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.16285656455323885, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.38593361366316536, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.08092905161582194, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.40325551743862786, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.17677215260187162, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.42492794367017145, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.31671366337624424, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.25561861397357827, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3066033339868866, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.13670948742268121, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4671925185138795, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.26302325128893417, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.14677278920264347, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.43976708356683103, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.155064927647868, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4475601468947218, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.11572453857976596, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.40350265317086687, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.08609583710384033, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.393787114023589, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.15179857311923614, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.4250156741030636, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.3082198822374831, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.09663983962924942, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation_to", + "metric": "chrf", + "score": 0.28623327813191574, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2686036309072948, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.46796278650100787, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.32114886466116627, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4290130719589314, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.31745588252583107, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.47247928950031065, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.19602407208403763, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3632643493569709, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.31282819180907734, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4675168560541054, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2830521145956212, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4573087432916607, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.22312700803550112, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.41892404547996925, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.20687905897572376, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.362340397723073, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.20687905897572376, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.362340397723073, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.22579912926222612, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3783017116228253, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.32978577921952357, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.42924039608828407, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.14088570267002404, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.3918583817524295, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.3766280281182921, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.561875298764404, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.30926866193743463, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.4734674396049645, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.32379826522537186, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.46258247655884976, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.009198656648134795, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.25957913590023596, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.38356760326816786, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.2430631876672867, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.43715989625290264, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.0180364018094266, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "bleu", + "score": 0.17628695321990814, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation_to", + "metric": "chrf", + "score": 0.35674625496754503, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3021887988636079, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45419878739503283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23552484042758592, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4709821983218137, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.27344034184005456, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4592757626540918, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24521459631436066, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.39838009260057416, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.38474707938945013, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5248793758647762, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23416315942114563, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4208857451138015, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.38791398909746805, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5307069804730096, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11525474107629258, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3599175708579159, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.31490883229126904, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.19107718673008198, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4125999494584315, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.34524014843941836, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4895068008032555, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.30994419975665016, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4742646380437834, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5050428988316858, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6493635959645938, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4052322980796165, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49611847082895705, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3328216606045846, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4941913602504158, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.053879020811737684, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.38021491541756264, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31392132264048933, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5754896830224385, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.12453127610933407, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.29863174206979526, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.007211634308109934, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3243463643565424, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5301315752666177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.26012602101891624, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5371799694530636, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.14137375964454066, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4881276325562942, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2524400150751196, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.543218311413834, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1680217411909958, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.476749806940524, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.21986501688175145, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5475496671985627, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.18643403650822063, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.47409410383361306, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.15942530661337126, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5151997420421937, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.1551207146087495, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.45222225168463764, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.07562263205281951, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.40405285180350065, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.11247951467712404, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49181415463176537, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.26037258624452736, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5454235944725728, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.07343660663917347, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.3787234084201135, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.2921802715934955, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5926371897941235, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.11125382292406938, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5333694859964047, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.16669278390769504, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49159200223893573, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.41163971744852484, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.6347999526262019, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.3447572227473206, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.5387838115437739, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.4062756549837031, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.04596396358288162, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "bleu", + "score": 0.17188988271346264, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation_to", + "metric": "chrf", + "score": 0.49227421818586253, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.05156674665890638, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.36698218242626135, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3430290995002961, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.21039621982729317, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3552217341881303, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.29842536574876644, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3490672284963974, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3280456612129034, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.24230069025465048, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.26455714412627024, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.13029936621144683, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.03935386961535685, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2024319322301778, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.11865659492276644, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.43729335450310336, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.07043397949482622, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3852802461214156, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.3227864720021685, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.14627961137626996, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.44627375475551206, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.15387660309188697, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.4518012668858871, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.32862413291299897, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.008721090647871285, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation_to", + "metric": "chrf", + "score": 0.2647246444953521, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.09952498083578393, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3978595245805609, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.10916808066739564, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.43671495255219495, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.10681900656181822, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4311441166019649, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3941055692915637, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.09015951516022348, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.41181252304348637, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1191908679309468, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.45187698687012057, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.10615684540251687, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.460046617317305, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.23054527938920757, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.49607909097851804, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.23054527938920757, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4915475454360348, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.13869428765868777, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.46453154080273173, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.1001225426710981, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.40793765705272267, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3829530312593843, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.228441262407929, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.49042651481296606, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.09051954027868377, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.3959277285416738, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.139731584961703, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.42457915173356314, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0969377041969594, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4325087280742601, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.13675976358974223, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.4986242378041087, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.37656227734611086, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "bleu", + "score": 0.12588329541191326, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation_to", + "metric": "chrf", + "score": 0.41955518796523705, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.38459672847260074, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.41318454544592675, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3093723160200868, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.3100065568321291, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.47847792583530074, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.07243671671799473, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.39798076169845714, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.413922696186207, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.24716364461082166, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.23820815591418562, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.07498532429953313, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.10846480297768277, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.13086094852026312, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.10533861301133443, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.48564427576794694, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.1670515635012818, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.5251495423402189, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.40063869758193577, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.4223466541571402, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.43551167119135525, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.2548887818537811, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.022883128970738017, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation_to", + "metric": "chrf", + "score": 0.339150270719077, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.14073835588074438, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3186851468537677, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.11092528636668526, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.31604383600836816, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3032188091357597, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.07647695577791483, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.2986901902965963, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.2061578366200879, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.37101078385857167, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.11823053204772466, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.33614010685052986, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.13839209880933745, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.35315147740153213, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.10306540752539145, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3271343056196344, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.10306540752539145, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3271343056196344, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.079343898106884, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.11055607910487246, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3203685139724149, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.13862954544342151, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.10447015481416681, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3834682647143798, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.1570432711803692, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3293030041429665, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.24985540866207695, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.43357023350146434, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.33987749185852145, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.09763088860404724, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.3488516919947926, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.2863678077311179, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.17460041508427898, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation_to", + "metric": "chrf", + "score": 0.28744629692241225, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "translation_to", - "metric": "chrf", - "score": 0.4457322258249424, + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "translation_to", - "metric": "chrf", - "score": 0.008086388726125911, + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", - "metric": "bleu", - "score": 0.17781916046116683, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "translation_to", - "metric": "chrf", - "score": 0.5216356191979474, + "model": "deepseek/deepseek-chat", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.30894994002746395, + "model": "microsoft/phi-4", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.6495798576994254, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.35806497640912766, + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5522671396375264, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.3391824705480895, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5823536571792293, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.3369582032493922, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.604856884215657, + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.24078435074822624, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5511929774495076, + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.23399485663908418, + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5631067041333725, + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.15084681924900642, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5314568462829651, + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.31818246779032233, + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.6229693133713873, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.28809973780460224, + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5952046679740143, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.01955520210672138, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.21292283406490206, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.3315037521841549, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.588148042382191, + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.01890321292509088, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", - "metric": "bleu", - "score": 0.1721831215207535, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "translation_to", - "metric": "chrf", - "score": 0.5841092375226741, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.25325897139464854, + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.511461689033225, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.3730973285213212, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5926422939507472, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.21478093144117116, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5322186790358018, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.27204984763557305, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5456825634559386, + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.18081719101162075, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5207488300398541, + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.1793410088328766, + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5126133936832279, + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.24328420398524073, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.4502822427440237, + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.4008628432878983, + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5958332944294813, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.41505282193631027, + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5557512735595823, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.20911971038029412, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.5234684603685517, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.3369338116697911, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.593905704810687, + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.07367272607925157, + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.37211692202201907, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", - "metric": "bleu", - "score": 0.2571225648472028, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "translation_to", - "metric": "chrf", - "score": 0.48544337623818506, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.8827916928185874, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.9278293769424701, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.5919743410620021, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.8142101616656354, + "model": "openai/gpt-4.1-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.2762822897608569, + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.0, + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.24942094354139677, + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, + "model": "mistralai/mistral-saba", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "model": "mistralai/mistral-nemo", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.6642718379939968, + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.7768492311706325, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.0, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, + "model": "deepseek/deepseek-chat", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 1.0, + "model": "microsoft/phi-4", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", - "metric": "bleu", - "score": 0.7660237942267061, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "translation_to", - "metric": "chrf", - "score": 0.8523393041110139, + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -70694,15 +288371,15 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70710,7 +288387,15 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70718,15 +288403,31 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70734,39 +288435,71 @@ { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "en", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70774,31 +288507,31 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70806,15 +288539,23 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70822,23 +288563,55 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -70846,31 +288619,47 @@ { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70878,23 +288667,23 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -70902,15 +288691,15 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70918,7 +288707,15 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70926,47 +288723,79 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -70974,7 +288803,23 @@ { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -70982,103 +288827,159 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71086,39 +288987,39 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71126,247 +289027,247 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-saba", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-nemo", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71374,31 +289275,39 @@ { "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71406,7 +289315,7 @@ { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71414,7 +289323,7 @@ { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71422,15 +289331,15 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71438,31 +289347,71 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71470,31 +289419,47 @@ { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71502,39 +289467,39 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71542,247 +289507,247 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-saba", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-nemo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71790,23 +289755,31 @@ { "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71814,23 +289787,23 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71838,7 +289811,7 @@ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71846,7 +289819,7 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71854,7 +289827,15 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71862,15 +289843,31 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71878,7 +289875,23 @@ { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71886,23 +289899,23 @@ { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71910,7 +289923,23 @@ { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71918,31 +289947,31 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71950,7 +289979,7 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71958,7 +289987,15 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71966,15 +290003,31 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -71982,7 +290035,23 @@ { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -71990,23 +290059,23 @@ { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 @@ -72014,39 +290083,55 @@ { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72054,7 +290139,7 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72062,7 +290147,15 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72070,15 +290163,31 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72086,39 +290195,71 @@ { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "de", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72126,31 +290267,31 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72158,7 +290299,7 @@ { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72166,7 +290307,15 @@ { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72174,55 +290323,103 @@ { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 0 @@ -72230,14066 +290427,15546 @@ { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "deepseek/deepseek-chat", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-saba", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "microsoft/phi-4", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-saba", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-nemo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-saba", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-nemo", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemma-3-27b-it", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "openai/gpt-4.1-mini", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 1 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4.1-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4.1-nano", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-3.5-turbo", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "en", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4.1-nano", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-saba", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "id", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", + "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-4.1-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", + "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 2 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 2 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "en", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4.1-mini", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4.1-nano", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", + "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 3 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-saba", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-nemo", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-nemo", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "openai/gpt-4.1-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4.1-nano", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", + "model": "openai/gpt-4o-mini", "bcp_47": "de", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "de", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ja", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "vi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "tr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ko", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", - "task": "classification", + "bcp_47": "fil", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", - "task": "classification", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", - "task": "classification", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", - "task": "classification", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", - "task": "classification", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 4 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", - "task": "classification", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", - "task": "classification", + "bcp_47": "ha", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -86297,7 +305974,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -86305,7 +305982,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -86313,15 +305990,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -86329,7 +306014,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -86337,7 +306038,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", @@ -86345,31 +306062,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -86377,7 +306110,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -86385,7 +306118,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -86393,15 +306126,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -86409,7 +306142,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -86417,15 +306150,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -86433,7 +306174,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -86441,7 +306198,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", @@ -86449,23 +306222,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -86473,15 +306246,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -86489,15 +306278,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -86505,7 +306294,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -86513,7 +306302,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -86521,15 +306310,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -86537,7 +306334,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -86545,31 +306358,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -86577,7 +306406,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -86585,7 +306430,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -86593,7 +306438,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -86601,7 +306446,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -86609,7 +306454,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -86617,7 +306462,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -86625,15 +306470,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -86641,7 +306494,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -86649,7 +306518,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", @@ -86657,31 +306542,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -86689,7 +306590,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -86697,7 +306598,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -86705,7 +306606,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -86713,7 +306614,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -86721,7 +306622,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -86729,15 +306630,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -86745,7 +306654,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -86753,7 +306678,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", @@ -86761,7 +306702,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", @@ -86769,31 +306718,39 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -86801,15 +306758,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -86817,23 +306774,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", @@ -86841,47 +306806,79 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -86889,15 +306886,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -86905,199 +306918,311 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -87105,7 +307230,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -87113,7 +307238,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -87121,15 +307246,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -87137,7 +307262,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -87145,15 +307270,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -87161,7 +307294,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -87169,7 +307318,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", @@ -87177,7 +307342,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", @@ -87185,39 +307358,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -87225,7 +307406,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -87233,7 +307414,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -87241,31 +307422,55 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -87273,31 +307478,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -87305,15 +307526,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -87321,7 +307558,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -87329,7 +307566,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -87337,7 +307574,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -87345,7 +307582,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -87353,7 +307590,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", @@ -87361,7 +307606,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -87369,7 +307614,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -87377,31 +307638,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -87409,7 +307686,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -87417,7 +307710,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -87425,7 +307718,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -87433,15 +307726,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -87449,7 +307742,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -87457,15 +307750,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -87473,7 +307774,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -87481,39 +307798,71 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -87521,7 +307870,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", @@ -87529,7 +307878,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", @@ -87537,7 +307886,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -87545,7 +307894,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -87553,7 +307902,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -87561,1551 +307910,1751 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", + "model": "openai/gpt-3.5-turbo", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "mistralai/mistral-saba", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, - { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 5 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-saba", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89113,39 +309662,79 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89153,31 +309742,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -89185,15 +309790,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -89201,7 +309806,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89209,7 +309814,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89217,7 +309822,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -89225,7 +309830,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", @@ -89233,7 +309846,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -89241,15 +309854,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89257,23 +309902,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -89281,7 +309926,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -89289,23 +309950,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89313,15 +309974,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -89329,7 +309990,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", @@ -89337,47 +310006,79 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "openai/gpt-3.5-turbo", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-saba", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -89385,31 +310086,47 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89417,7 +310134,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89425,7 +310142,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -89433,15 +310150,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -89449,15 +310174,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89465,103 +310222,159 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-4.1-nano", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-saba", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "mistralai/mistral-nemo", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89569,31 +310382,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -89601,23 +310430,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89625,7 +310454,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89633,7 +310462,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -89641,7 +310470,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", @@ -89649,7 +310486,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -89657,15 +310494,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89673,31 +310542,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -89705,7 +310590,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", @@ -89713,7 +310598,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -89721,7 +310606,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89729,7 +310614,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89737,7 +310622,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -89745,15 +310630,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -89761,15 +310654,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89777,23 +310702,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -89801,7 +310726,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -89809,23 +310750,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89833,7 +310774,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89841,15 +310782,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", @@ -89857,7 +310806,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -89865,15 +310814,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89881,7 +310862,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -89889,23 +310878,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -89913,15 +310910,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -89929,7 +310926,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -89937,7 +310934,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -89945,15 +310942,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", @@ -89961,15 +310966,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -89977,7 +310998,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -89985,31 +311022,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -90017,7 +311070,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", @@ -90025,15 +311078,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -90041,7 +311094,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -90049,7 +311102,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -90057,7 +311110,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", @@ -90065,7 +311126,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -90073,15 +311134,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", @@ -90089,31 +311182,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -90121,31 +311230,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -90153,47 +311262,95 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", @@ -90201,55 +311358,63 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -90257,63 +311422,103 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -90321,7 +311526,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -90329,1767 +311550,2079 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "bcp_47": "fil", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 6 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 6 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4.1-nano", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -92097,7 +313630,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -92105,7 +313638,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92113,23 +313646,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -92137,63 +313670,119 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -92201,15 +313790,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92217,7 +313806,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -92225,7 +313814,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -92233,7 +313822,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -92241,7 +313830,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", @@ -92249,7 +313846,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", @@ -92257,15 +313854,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", @@ -92273,7 +313902,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", @@ -92281,23 +313918,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -92305,7 +313950,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -92313,7 +313958,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92321,7 +313966,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -92329,7 +313974,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -92337,7 +313982,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -92345,23 +313990,47 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -92369,39 +314038,71 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.5-flash-preview", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -92409,7 +314110,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -92417,7 +314118,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92425,7 +314126,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -92433,31 +314134,39 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", @@ -92465,15 +314174,47 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", @@ -92481,31 +314222,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -92513,7 +314270,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -92521,31 +314278,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -92553,23 +314310,47 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -92577,31 +314358,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -92609,23 +314406,39 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92633,95 +314446,151 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -92729,7 +314598,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92737,7 +314606,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -92745,7 +314614,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -92753,7 +314622,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -92761,7 +314630,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", @@ -92769,71 +314646,119 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-nemo", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92841,15 +314766,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -92857,39 +314782,79 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", @@ -92897,23 +314862,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -92921,15 +314886,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", @@ -92937,7 +314918,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", @@ -92945,7 +314926,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -92953,7 +314934,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -92961,39 +314942,79 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", @@ -93001,23 +315022,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -93025,1495 +315046,1655 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 - }, - { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -94521,15 +316702,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -94537,23 +316726,55 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -94561,47 +316782,63 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -94609,7 +316846,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -94617,7 +316854,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -94625,7 +316862,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -94633,15 +316870,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -94649,15 +316894,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -94665,7 +316942,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", @@ -94673,23 +316958,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -94697,15 +316990,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -94713,7 +317006,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -94721,15 +317014,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -94737,7 +317030,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -94745,79 +317046,127 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -94825,7 +317174,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -94833,7 +317182,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -94841,15 +317190,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -94857,15 +317214,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -94873,23 +317262,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -94897,7 +317286,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -94905,15 +317310,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -94921,7 +317326,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -94929,7 +317334,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -94937,7 +317342,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -94945,7 +317350,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -94953,7 +317366,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -94961,15 +317374,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -94977,55 +317422,71 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -95033,7 +317494,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -95041,7 +317502,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -95049,7 +317510,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -95057,7 +317526,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -95065,15 +317534,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -95081,23 +317582,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -95105,23 +317606,39 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -95129,7 +317646,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -95137,7 +317654,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -95145,7 +317662,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -95153,15 +317670,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -95169,15 +317694,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -95185,23 +317742,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -95209,15 +317766,31 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", @@ -95225,15 +317798,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -95241,7 +317814,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -95249,15 +317822,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -95265,7 +317846,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -95273,15 +317854,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -95289,23 +317902,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -95313,23 +317926,39 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ru", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -95337,7 +317966,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -95345,7 +317974,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -95353,7 +317982,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -95361,7 +317990,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -95369,7 +318006,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -95377,7 +318014,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -95385,31 +318038,47 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -95417,7 +318086,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -95425,23 +318110,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -95449,7 +318134,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -95457,7 +318142,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -95465,7 +318150,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", @@ -95473,7 +318166,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", @@ -95481,15 +318174,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", @@ -95497,7 +318222,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", @@ -95505,127 +318238,191 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -95633,31 +318430,31 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -95665,71 +318462,127 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -95737,1455 +318590,1599 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "tr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "model": "openai/gpt-4.1-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4.1-nano", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -97193,7 +320190,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -97201,7 +320198,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97209,7 +320206,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97217,7 +320214,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97225,7 +320222,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97233,7 +320230,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -97241,7 +320246,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97249,7 +320254,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -97257,7 +320278,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -97265,23 +320302,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -97289,7 +320326,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -97297,7 +320350,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -97305,7 +320358,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97313,7 +320366,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97321,7 +320374,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97329,7 +320382,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97337,7 +320390,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -97345,7 +320406,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97353,7 +320414,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -97361,55 +320438,87 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "mistralai/mistral-saba", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "mistralai/mistral-nemo", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97417,7 +320526,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97425,7 +320534,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97433,7 +320542,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97441,15 +320550,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97457,7 +320574,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -97465,7 +320598,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -97473,23 +320622,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -97497,23 +320646,39 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", + "model": "microsoft/phi-4", "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97521,7 +320686,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97529,7 +320694,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97537,7 +320702,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97545,7 +320710,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -97553,7 +320726,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97561,7 +320734,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -97569,7 +320758,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -97577,31 +320782,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -97609,15 +320830,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97625,7 +320846,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97633,7 +320854,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97641,7 +320862,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97649,15 +320870,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97665,7 +320894,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -97673,7 +320918,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -97681,7 +320942,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", @@ -97689,31 +320958,39 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "deepseek/deepseek-chat", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "microsoft/phi-4", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -97721,7 +320998,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97729,7 +321006,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97737,7 +321014,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97745,7 +321022,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97753,7 +321030,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -97761,7 +321046,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97769,15 +321054,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -97785,23 +321102,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -97809,7 +321126,23 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -97817,15 +321150,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97833,7 +321166,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97841,7 +321174,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97849,7 +321182,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97857,7 +321190,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -97865,7 +321206,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -97873,7 +321214,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -97881,7 +321238,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -97889,23 +321262,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -97913,7 +321286,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -97921,15 +321310,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -97937,7 +321326,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -97945,7 +321334,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -97953,7 +321342,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -97961,7 +321350,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -97969,271 +321366,431 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "sw", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "deepseek/deepseek-chat", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "sw", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "openai/gpt-3.5-turbo-0613", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-3.5-turbo", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "mistralai/mistral-saba", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", + "model": "mistralai/mistral-nemo", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "google/gemini-2.5-flash-preview", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", + "model": "google/gemma-3-27b-it", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", + "model": "microsoft/phi-4", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "id", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -98241,7 +321798,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -98249,7 +321806,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -98257,7 +321814,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -98265,7 +321822,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -98273,7 +321830,15 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4.1-nano", @@ -98281,7 +321846,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -98289,7 +321854,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -98297,7 +321878,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -98305,23 +321902,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "de", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", @@ -98329,7 +321926,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "de", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -98337,7 +321950,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -98345,7 +321958,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", @@ -98353,7 +321966,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -98361,7 +321974,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -98369,7 +321982,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", @@ -98377,15 +321990,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", + "model": "openai/gpt-4.1-mini", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-nano", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", @@ -98393,7 +322014,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", @@ -98401,7 +322038,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.5-flash-preview", @@ -98409,31 +322062,47 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "google/gemma-3-27b-it", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ja", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ja", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -98441,7 +322110,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", @@ -98449,11 +322118,11 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98461,7 +322130,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98469,7 +322138,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98477,7 +322146,15 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "te", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98485,263 +322162,263 @@ }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "en", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "mistralai/mistral-saba", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "en", + "model": "mistralai/mistral-nemo", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "microsoft/phi-4", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "zh", + "model": "openai/gpt-4.1-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4.1-nano", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "hi", + "model": "mistralai/mistral-saba", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "hi", + "model": "mistralai/mistral-nemo", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "hi", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "hi", + "model": "microsoft/phi-4", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98749,7 +322426,7 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98757,7 +322434,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "hi", + "bcp_47": "vi", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -98765,7 +322442,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98773,7 +322450,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98781,7 +322458,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -98789,279 +322466,287 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "es", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "es", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "es", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "es", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "es", + "model": "mistralai/mistral-saba", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-nemo", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "es", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ar", + "model": "deepseek/deepseek-chat", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ar", + "model": "microsoft/phi-4", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ar", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ar", + "model": "openai/gpt-4.1-mini", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4.1-nano", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-saba", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "fr", + "model": "mistralai/mistral-nemo", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "fr", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "fr", + "model": "deepseek/deepseek-chat", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "fr", + "model": "microsoft/phi-4", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "fr", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -99069,7 +322754,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "fr", + "bcp_47": "tr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99077,7 +322762,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99085,7 +322770,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99093,7 +322778,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99101,247 +322786,247 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "bn", + "model": "openai/gpt-4.1-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "openai/gpt-4.1-nano", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "bn", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "bn", + "model": "mistralai/mistral-saba", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-nemo", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "bn", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "pt", + "model": "microsoft/phi-4", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "pt", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "pt", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "pt", + "model": "openai/gpt-4.1-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "pt", + "model": "openai/gpt-4.1-nano", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ru", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ru", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "ru", + "model": "mistralai/mistral-saba", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ru", + "model": "mistralai/mistral-nemo", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99349,23 +323034,31 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "ru", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ru", + "model": "deepseek/deepseek-chat", + "bcp_47": "it", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99373,7 +323066,7 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ru", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99381,7 +323074,7 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ru", + "bcp_47": "it", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99389,7 +323082,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "sw", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99397,7 +323090,7 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99405,7 +323098,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "sw", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99413,247 +323106,247 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "sw", + "model": "openai/gpt-4.1-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "sw", + "model": "openai/gpt-4.1-nano", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "sw", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "sw", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "sw", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "sw", + "model": "mistralai/mistral-saba", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "sw", + "model": "mistralai/mistral-nemo", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "sw", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "id", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "id", + "model": "microsoft/phi-4", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "id", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "id", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "id", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "id", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "id", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "id", + "model": "openai/gpt-4.1-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "id", + "model": "openai/gpt-4.1-nano", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-3.5-turbo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "de", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4.1-nano", - "bcp_47": "de", + "model": "mistralai/mistral-saba", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "de", + "model": "mistralai/mistral-nemo", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "de", + "model": "google/gemini-2.5-flash-preview", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "google/gemini-2.5-flash-preview", - "bcp_47": "de", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99661,23 +323354,31 @@ }, { "model": "google/gemma-3-27b-it", - "bcp_47": "de", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { - "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "de", + "model": "deepseek/deepseek-chat", + "bcp_47": "pl", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99685,47 +323386,55 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "de", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "de", + "bcp_47": "pl", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "ja", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4.1-mini", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -99733,7 +323442,7 @@ }, { "model": "openai/gpt-4.1-nano", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -99741,39 +323450,71 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo-0613", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-3.5-turbo", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-3.1-24b-instruct", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, + { + "model": "mistralai/mistral-saba", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, { "model": "google/gemini-2.5-flash-preview", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "ja", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -99781,15 +323522,31 @@ }, { "model": "deepseek/deepseek-chat-v3-0324", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4", + "bcp_47": "ha", + "task": "mmlu", + "metric": "accuracy", + "score": 0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", "score": 0, @@ -99797,10 +323554,10 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "ja", + "bcp_47": "ha", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 } ]